From af2fdcc2a07dc3ce16107fab7b7f09f721b9ea36 Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Mon, 5 May 2025 15:11:40 -0400 Subject: [PATCH 1/5] [GlobalISel][AMDGPU] Fix handling of v2i128 type for AND, OR, XOR Change-Id: I709d434e111f61e867c4fc284f1f4e768a083015 --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 15 ++- .../CodeGen/AMDGPU/GlobalISel/and.v2i128.ll | 117 ++++++++++++++++++ .../CodeGen/AMDGPU/GlobalISel/or.v2i128.ll | 117 ++++++++++++++++++ .../CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll | 117 ++++++++++++++++++ 4 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ff8658ed82a72..e8063d54ac65a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -119,6 +119,18 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) { }; } +static LegalizeMutation breakCurrentEltsToSize32Or64(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getElementType(); + const int Size = Ty.getSizeInBits(); + const int EltSize = EltTy.getSizeInBits(); + const unsigned TargetEltSize = EltSize % 64 == 0 ? 64 : 32; + const unsigned NewNumElts = (Size + (TargetEltSize - 1)) / TargetEltSize; + return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, TargetEltSize)); + }; +} + // Increase the number of vector elements to reach the next multiple of 32-bit // type. static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { @@ -875,7 +887,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) .clampScalar(0, S32, S64) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0)) + .fewerElementsIf(all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)), fewerEltsToSize64Vector(0)) + .bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)), breakCurrentEltsToSize32Or64(0)) .widenScalarToNextPow2(0) .scalarize(0); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll new file mode 100644 index 0000000000000..532a797094d14 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s + +define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) { +; GFX7-LABEL: v_and_v2i128: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX7-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX7-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX7-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX7-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX7-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_and_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX9-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX9-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_and_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX8-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX8-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX8-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX8-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_and_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX10-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX10-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX10-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX10-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX10-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX10-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX10-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %and = and <2 x i128> %a, %b + ret <2 x i128> %and +} + +define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) { +; GFX7-LABEL: v_and_v2i128_inline_imm: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b64 s[4:5], 64 +; GFX7-NEXT: s_mov_b64 s[6:7], 0 +; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 +; GFX7-NEXT: v_and_b32_e32 v3, s7, v3 +; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX7-NEXT: v_and_b32_e32 v5, s5, v5 +; GFX7-NEXT: v_and_b32_e32 v6, s6, v6 +; GFX7-NEXT: v_and_b32_e32 v7, s7, v7 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_and_v2i128_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], 64 +; GFX9-NEXT: s_mov_b64 s[6:7], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX9-NEXT: v_and_b32_e32 v2, s6, v2 +; GFX9-NEXT: v_and_b32_e32 v3, s7, v3 +; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX9-NEXT: v_and_b32_e32 v5, s5, v5 +; GFX9-NEXT: v_and_b32_e32 v6, s6, v6 +; GFX9-NEXT: v_and_b32_e32 v7, s7, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_and_v2i128_inline_imm: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b64 s[4:5], 64 +; GFX8-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX8-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX8-NEXT: v_and_b32_e32 v2, s6, v2 +; GFX8-NEXT: v_and_b32_e32 v3, s7, v3 +; GFX8-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX8-NEXT: v_and_b32_e32 v5, s5, v5 +; GFX8-NEXT: v_and_b32_e32 v6, s6, v6 +; GFX8-NEXT: v_and_b32_e32 v7, s7, v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %and = and <2 x i128> %a, + ret <2 x i128> %and +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll new file mode 100644 index 0000000000000..eaba0500dc1f3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s + +define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) { +; GFX7-LABEL: v_or_v2i128: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX7-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX7-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX7-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_or_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX9-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX9-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX9-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_or_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX8-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX8-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_or_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX10-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX10-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX10-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX10-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX10-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %or = or <2 x i128> %a, %b + ret <2 x i128> %or +} + +define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) { +; GFX7-LABEL: v_or_v2i128_inline_imm: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b64 s[4:5], 64 +; GFX7-NEXT: s_mov_b64 s[6:7], 0 +; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX7-NEXT: v_or_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_or_b32_e32 v1, s5, v1 +; GFX7-NEXT: v_or_b32_e32 v2, s6, v2 +; GFX7-NEXT: v_or_b32_e32 v3, s7, v3 +; GFX7-NEXT: v_or_b32_e32 v4, s4, v4 +; GFX7-NEXT: v_or_b32_e32 v5, s5, v5 +; GFX7-NEXT: v_or_b32_e32 v6, s6, v6 +; GFX7-NEXT: v_or_b32_e32 v7, s7, v7 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_or_v2i128_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], 64 +; GFX9-NEXT: s_mov_b64 s[6:7], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX9-NEXT: v_or_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_or_b32_e32 v1, s5, v1 +; GFX9-NEXT: v_or_b32_e32 v2, s6, v2 +; GFX9-NEXT: v_or_b32_e32 v3, s7, v3 +; GFX9-NEXT: v_or_b32_e32 v4, s4, v4 +; GFX9-NEXT: v_or_b32_e32 v5, s5, v5 +; GFX9-NEXT: v_or_b32_e32 v6, s6, v6 +; GFX9-NEXT: v_or_b32_e32 v7, s7, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_or_v2i128_inline_imm: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b64 s[4:5], 64 +; GFX8-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX8-NEXT: v_or_b32_e32 v0, s4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s5, v1 +; GFX8-NEXT: v_or_b32_e32 v2, s6, v2 +; GFX8-NEXT: v_or_b32_e32 v3, s7, v3 +; GFX8-NEXT: v_or_b32_e32 v4, s4, v4 +; GFX8-NEXT: v_or_b32_e32 v5, s5, v5 +; GFX8-NEXT: v_or_b32_e32 v6, s6, v6 +; GFX8-NEXT: v_or_b32_e32 v7, s7, v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %or = or <2 x i128> %a, + ret <2 x i128> %or +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll new file mode 100644 index 0000000000000..291d27b0cf527 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s + +define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) { +; GFX7-LABEL: v_xor_v2i128: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX7-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX7-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX7-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX7-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX7-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_xor_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX9-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX9-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX9-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_xor_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX8-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX8-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX8-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX8-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_xor_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX10-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX10-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX10-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX10-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX10-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %xor = xor <2 x i128> %a, %b + ret <2 x i128> %xor +} + +define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) { +; GFX7-LABEL: v_xor_v2i128_inline_imm: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b64 s[4:5], 64 +; GFX7-NEXT: s_mov_b64 s[6:7], 0 +; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX7-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_xor_b32_e32 v1, s5, v1 +; GFX7-NEXT: v_xor_b32_e32 v2, s6, v2 +; GFX7-NEXT: v_xor_b32_e32 v3, s7, v3 +; GFX7-NEXT: v_xor_b32_e32 v4, s4, v4 +; GFX7-NEXT: v_xor_b32_e32 v5, s5, v5 +; GFX7-NEXT: v_xor_b32_e32 v6, s6, v6 +; GFX7-NEXT: v_xor_b32_e32 v7, s7, v7 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_xor_v2i128_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], 64 +; GFX9-NEXT: s_mov_b64 s[6:7], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1 +; GFX9-NEXT: v_xor_b32_e32 v2, s6, v2 +; GFX9-NEXT: v_xor_b32_e32 v3, s7, v3 +; GFX9-NEXT: v_xor_b32_e32 v4, s4, v4 +; GFX9-NEXT: v_xor_b32_e32 v5, s5, v5 +; GFX9-NEXT: v_xor_b32_e32 v6, s6, v6 +; GFX9-NEXT: v_xor_b32_e32 v7, s7, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_xor_v2i128_inline_imm: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b64 s[4:5], 64 +; GFX8-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5] +; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7] +; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX8-NEXT: v_xor_b32_e32 v1, s5, v1 +; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 +; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3 +; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4 +; GFX8-NEXT: v_xor_b32_e32 v5, s5, v5 +; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6 +; GFX8-NEXT: v_xor_b32_e32 v7, s7, v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %xor = xor <2 x i128> %a, + ret <2 x i128> %xor +} From ca16db8d7dbcafd4ef79cfbae84355e6f28f553d Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Mon, 5 May 2025 16:23:15 -0400 Subject: [PATCH 2/5] [GlobalISel][AMDGPU] Fix formatting Change-Id: If980ba1599f9eb805ae4ba0566d1e0c5459ff8ff --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index e8063d54ac65a..f78cf66c25436 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -884,13 +884,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Report legal for any types we can handle anywhere. For the cases only legal // on the SALU, RegBankSelect will be able to re-legalize. getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) - .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) - .clampScalar(0, S32, S64) - .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .fewerElementsIf(all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)), fewerEltsToSize64Vector(0)) - .bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)), breakCurrentEltsToSize32Or64(0)) - .widenScalarToNextPow2(0) - .scalarize(0); + .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) + .clampScalar(0, S32, S64) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) + .fewerElementsIf( + all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)), + fewerEltsToSize64Vector(0)) + .bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)), + breakCurrentEltsToSize32Or64(0)) + .widenScalarToNextPow2(0) + .scalarize(0); getActionDefinitionsBuilder( {G_UADDO, G_USUBO, G_UADDE, G_SADDE, G_USUBE, G_SSUBE}) From 4027444ed8494e47595f45bf7c5032c2c3405f7d Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Mon, 5 May 2025 16:48:37 -0400 Subject: [PATCH 3/5] [GlobalISel][AMDGPU] Address comments Change-Id: Ia82e785f936dae63180b62a297b5cd2a1d1b8bf3 --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 --- .../CodeGen/AMDGPU/GlobalISel/and.v2i128.ll | 97 ++++++++++++------- .../CodeGen/AMDGPU/GlobalISel/or.v2i128.ll | 71 +++++++------- .../CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll | 71 +++++++------- 4 files changed, 128 insertions(+), 125 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f78cf66c25436..bf994877f3a4a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -119,18 +119,6 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) { }; } -static LegalizeMutation breakCurrentEltsToSize32Or64(unsigned TypeIdx) { - return [=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[TypeIdx]; - const LLT EltTy = Ty.getElementType(); - const int Size = Ty.getSizeInBits(); - const int EltSize = EltTy.getSizeInBits(); - const unsigned TargetEltSize = EltSize % 64 == 0 ? 64 : 32; - const unsigned NewNumElts = (Size + (TargetEltSize - 1)) / TargetEltSize; - return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, TargetEltSize)); - }; -} - // Increase the number of vector elements to reach the next multiple of 32-bit // type. static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { @@ -890,8 +878,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .fewerElementsIf( all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)), fewerEltsToSize64Vector(0)) - .bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)), - breakCurrentEltsToSize32Or64(0)) .widenScalarToNextPow2(0) .scalarize(0); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll index 532a797094d14..064ac89e539c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) { ; GFX7-LABEL: v_and_v2i128: @@ -57,6 +57,19 @@ define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) { ; GFX10-NEXT: v_and_b32_e32 v6, v6, v14 ; GFX10-NEXT: v_and_b32_e32 v7, v7, v15 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_and_v2i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX11-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX11-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX11-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX11-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX11-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX11-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX11-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX11-NEXT: s_setpc_b64 s[30:31] %and = and <2 x i128> %a, %b ret <2 x i128> %and } @@ -65,53 +78,63 @@ define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) { ; GFX7-LABEL: v_and_v2i128_inline_imm: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b64 s[4:5], 64 -; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s5, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_and_b32_e32 v3, s7, v3 -; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 -; GFX7-NEXT: v_and_b32_e32 v5, s5, v5 -; GFX7-NEXT: v_and_b32_e32 v6, s6, v6 -; GFX7-NEXT: v_and_b32_e32 v7, s7, v7 +; GFX7-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX7-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX7-NEXT: v_mov_b32_e32 v1, 0 +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0 +; GFX7-NEXT: v_mov_b32_e32 v5, 0 +; GFX7-NEXT: v_mov_b32_e32 v6, 0 +; GFX7-NEXT: v_mov_b32_e32 v7, 0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_and_v2i128_inline_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[4:5], 64 -; GFX9-NEXT: s_mov_b64 s[6:7], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_and_b32_e32 v1, s5, v1 -; GFX9-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX9-NEXT: v_and_b32_e32 v3, s7, v3 -; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 -; GFX9-NEXT: v_and_b32_e32 v5, s5, v5 -; GFX9-NEXT: v_and_b32_e32 v6, s6, v6 -; GFX9-NEXT: v_and_b32_e32 v7, s7, v7 +; GFX9-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX9-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_and_v2i128_inline_imm: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b64 s[4:5], 64 -; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_and_b32_e32 v1, s5, v1 -; GFX8-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX8-NEXT: v_and_b32_e32 v3, s7, v3 -; GFX8-NEXT: v_and_b32_e32 v4, s4, v4 -; GFX8-NEXT: v_and_b32_e32 v5, s5, v5 -; GFX8-NEXT: v_and_b32_e32 v6, s6, v6 -; GFX8-NEXT: v_and_b32_e32 v7, s7, v7 +; GFX8-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX8-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_mov_b32_e32 v3, 0 +; GFX8-NEXT: v_mov_b32_e32 v5, 0 +; GFX8-NEXT: v_mov_b32_e32 v6, 0 +; GFX8-NEXT: v_mov_b32_e32 v7, 0 ; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_and_v2i128_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX10-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_mov_b32_e32 v3, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, 0 +; GFX10-NEXT: v_mov_b32_e32 v6, 0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_and_v2i128_inline_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0 +; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0 +; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %and = and <2 x i128> %a, ret <2 x i128> %and } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll index eaba0500dc1f3..029fcadb51634 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) { ; GFX7-LABEL: v_or_v2i128: @@ -57,6 +57,19 @@ define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) { ; GFX10-NEXT: v_or_b32_e32 v6, v6, v14 ; GFX10-NEXT: v_or_b32_e32 v7, v7, v15 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_or_v2i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX11-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX11-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX11-NEXT: s_setpc_b64 s[30:31] %or = or <2 x i128> %a, %b ret <2 x i128> %or } @@ -65,53 +78,37 @@ define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) { ; GFX7-LABEL: v_or_v2i128_inline_imm: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b64 s[4:5], 64 -; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX7-NEXT: v_or_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_or_b32_e32 v1, s5, v1 -; GFX7-NEXT: v_or_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_or_b32_e32 v3, s7, v3 -; GFX7-NEXT: v_or_b32_e32 v4, s4, v4 -; GFX7-NEXT: v_or_b32_e32 v5, s5, v5 -; GFX7-NEXT: v_or_b32_e32 v6, s6, v6 -; GFX7-NEXT: v_or_b32_e32 v7, s7, v7 +; GFX7-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX7-NEXT: v_or_b32_e32 v4, 64, v4 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_or_v2i128_inline_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[4:5], 64 -; GFX9-NEXT: s_mov_b64 s[6:7], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9-NEXT: v_or_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_or_b32_e32 v1, s5, v1 -; GFX9-NEXT: v_or_b32_e32 v2, s6, v2 -; GFX9-NEXT: v_or_b32_e32 v3, s7, v3 -; GFX9-NEXT: v_or_b32_e32 v4, s4, v4 -; GFX9-NEXT: v_or_b32_e32 v5, s5, v5 -; GFX9-NEXT: v_or_b32_e32 v6, s6, v6 -; GFX9-NEXT: v_or_b32_e32 v7, s7, v7 +; GFX9-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX9-NEXT: v_or_b32_e32 v4, 64, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_or_v2i128_inline_imm: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b64 s[4:5], 64 -; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX8-NEXT: v_or_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_or_b32_e32 v1, s5, v1 -; GFX8-NEXT: v_or_b32_e32 v2, s6, v2 -; GFX8-NEXT: v_or_b32_e32 v3, s7, v3 -; GFX8-NEXT: v_or_b32_e32 v4, s4, v4 -; GFX8-NEXT: v_or_b32_e32 v5, s5, v5 -; GFX8-NEXT: v_or_b32_e32 v6, s6, v6 -; GFX8-NEXT: v_or_b32_e32 v7, s7, v7 +; GFX8-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX8-NEXT: v_or_b32_e32 v4, 64, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_or_v2i128_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX10-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_or_v2i128_inline_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX11-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX11-NEXT: s_setpc_b64 s[30:31] %or = or <2 x i128> %a, ret <2 x i128> %or } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll index 291d27b0cf527..2b5139783a144 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) { ; GFX7-LABEL: v_xor_v2i128: @@ -57,6 +57,19 @@ define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) { ; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14 ; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_xor_v2i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX11-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX11-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX11-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX11-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX11-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX11-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX11-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX11-NEXT: s_setpc_b64 s[30:31] %xor = xor <2 x i128> %a, %b ret <2 x i128> %xor } @@ -65,53 +78,37 @@ define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) { ; GFX7-LABEL: v_xor_v2i128_inline_imm: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b64 s[4:5], 64 -; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX7-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_xor_b32_e32 v1, s5, v1 -; GFX7-NEXT: v_xor_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_xor_b32_e32 v3, s7, v3 -; GFX7-NEXT: v_xor_b32_e32 v4, s4, v4 -; GFX7-NEXT: v_xor_b32_e32 v5, s5, v5 -; GFX7-NEXT: v_xor_b32_e32 v6, s6, v6 -; GFX7-NEXT: v_xor_b32_e32 v7, s7, v7 +; GFX7-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX7-NEXT: v_xor_b32_e32 v4, 64, v4 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_xor_v2i128_inline_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[4:5], 64 -; GFX9-NEXT: s_mov_b64 s[6:7], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1 -; GFX9-NEXT: v_xor_b32_e32 v2, s6, v2 -; GFX9-NEXT: v_xor_b32_e32 v3, s7, v3 -; GFX9-NEXT: v_xor_b32_e32 v4, s4, v4 -; GFX9-NEXT: v_xor_b32_e32 v5, s5, v5 -; GFX9-NEXT: v_xor_b32_e32 v6, s6, v6 -; GFX9-NEXT: v_xor_b32_e32 v7, s7, v7 +; GFX9-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX9-NEXT: v_xor_b32_e32 v4, 64, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_xor_v2i128_inline_imm: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b64 s[4:5], 64 -; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5] -; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_xor_b32_e32 v1, s5, v1 -; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 -; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3 -; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4 -; GFX8-NEXT: v_xor_b32_e32 v5, s5, v5 -; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6 -; GFX8-NEXT: v_xor_b32_e32 v7, s7, v7 +; GFX8-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX8-NEXT: v_xor_b32_e32 v4, 64, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_xor_v2i128_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX10-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_xor_v2i128_inline_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX11-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX11-NEXT: s_setpc_b64 s[30:31] %xor = xor <2 x i128> %a, ret <2 x i128> %xor } From 475de1de26c0e1b111171f011613112cf5d5a07c Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Tue, 6 May 2025 16:09:04 -0400 Subject: [PATCH 4/5] [GlobalISel][AMDGPU] Update tests Change-Id: I28f0273da90b5caa9133762ab272ede95a7dd82e --- llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll | 134 +++++++++++++++++ .../CodeGen/AMDGPU/GlobalISel/and.v2i128.ll | 140 ------------------ llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll | 108 ++++++++++++++ .../CodeGen/AMDGPU/GlobalISel/or.v2i128.ll | 114 -------------- llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll | 109 ++++++++++++++ .../CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll | 114 -------------- 6 files changed, 351 insertions(+), 368 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll index ed3720a950b38..87f39b6d2b604 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll @@ -834,6 +834,140 @@ define amdgpu_kernel void @s_and_u64_sext_with_sregs(ptr addrspace(1) %out, ptr store i64 %and, ptr addrspace(1) %out, align 8 ret void } + +define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) { +; GFX7-LABEL: v_and_v2i128: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX7-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX7-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX7-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX7-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX7-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_and_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX9-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX9-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_and_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX8-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX8-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX8-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX8-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_and_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX10-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX10-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX10-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX10-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX10-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX10-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX10-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_and_v2i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX11-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX11-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX11-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX11-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX11-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX11-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX11-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %and = and <2 x i128> %a, %b + ret <2 x i128> %and +} + +define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) { +; GFX7-LABEL: v_and_v2i128_inline_imm: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX7-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX7-NEXT: v_mov_b32_e32 v1, 0 +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0 +; GFX7-NEXT: v_mov_b32_e32 v5, 0 +; GFX7-NEXT: v_mov_b32_e32 v6, 0 +; GFX7-NEXT: v_mov_b32_e32 v7, 0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_and_v2i128_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX9-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-NEXT: v_mov_b32_e32 v7, 0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_and_v2i128_inline_imm: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX8-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_mov_b32_e32 v3, 0 +; GFX8-NEXT: v_mov_b32_e32 v5, 0 +; GFX8-NEXT: v_mov_b32_e32 v6, 0 +; GFX8-NEXT: v_mov_b32_e32 v7, 0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_and_v2i128_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_and_b32_e32 v0, 64, v0 +; GFX10-NEXT: v_and_b32_e32 v4, 64, v4 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_mov_b32_e32 v3, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, 0 +; GFX10-NEXT: v_mov_b32_e32 v6, 0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_and_v2i128_inline_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0 +; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0 +; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %and = and <2 x i128> %a, + ret <2 x i128> %and +} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11-FAKE16: {{.*}} ; GFX11-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll deleted file mode 100644 index 064ac89e539c0..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll +++ /dev/null @@ -1,140 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s - -define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) { -; GFX7-LABEL: v_and_v2i128: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX7-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX7-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX7-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX7-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_and_v2i128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX9-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX9-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX9-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX9-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX9-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_and_v2i128: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX8-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX8-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX8-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX8-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX8-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_and_v2i128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX10-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX10-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX10-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX10-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX10-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX10-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX10-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_and_v2i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX11-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX11-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX11-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX11-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX11-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX11-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX11-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %and = and <2 x i128> %a, %b - ret <2 x i128> %and -} - -define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) { -; GFX7-LABEL: v_and_v2i128_inline_imm: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, 64, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 64, v4 -; GFX7-NEXT: v_mov_b32_e32 v1, 0 -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0 -; GFX7-NEXT: v_mov_b32_e32 v5, 0 -; GFX7-NEXT: v_mov_b32_e32 v6, 0 -; GFX7-NEXT: v_mov_b32_e32 v7, 0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_and_v2i128_inline_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v0, 64, v0 -; GFX9-NEXT: v_and_b32_e32 v4, 64, v4 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, 0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_and_v2i128_inline_imm: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v0, 64, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 64, v4 -; GFX8-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 -; GFX8-NEXT: v_mov_b32_e32 v3, 0 -; GFX8-NEXT: v_mov_b32_e32 v5, 0 -; GFX8-NEXT: v_mov_b32_e32 v6, 0 -; GFX8-NEXT: v_mov_b32_e32 v7, 0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_and_v2i128_inline_imm: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_and_b32_e32 v0, 64, v0 -; GFX10-NEXT: v_and_b32_e32 v4, 64, v4 -; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-NEXT: v_mov_b32_e32 v6, 0 -; GFX10-NEXT: v_mov_b32_e32 v7, 0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_and_v2i128_inline_imm: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0 -; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4 -; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0 -; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %and = and <2 x i128> %a, - ret <2 x i128> %and -} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll index df034d82118b1..83b80bd5c50eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll @@ -838,6 +838,114 @@ define amdgpu_kernel void @s_or_u64_sext_with_sregs(ptr addrspace(1) %out, ptr a store i64 %or, ptr addrspace(1) %out, align 8 ret void } + +define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) { +; GFX7-LABEL: v_or_v2i128: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX7-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX7-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX7-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_or_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX9-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX9-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX9-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_or_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX8-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX8-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_or_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX10-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX10-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX10-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX10-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX10-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_or_v2i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX11-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX11-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %or = or <2 x i128> %a, %b + ret <2 x i128> %or +} + +define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) { +; GFX7-LABEL: v_or_v2i128_inline_imm: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX7-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_or_v2i128_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX9-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_or_v2i128_inline_imm: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX8-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_or_v2i128_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX10-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_or_v2i128_inline_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX11-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %or = or <2 x i128> %a, + ret <2 x i128> %or +} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11-FAKE16: {{.*}} ; GFX11-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll deleted file mode 100644 index 029fcadb51634..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll +++ /dev/null @@ -1,114 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s - -define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) { -; GFX7-LABEL: v_or_v2i128: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX7-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX7-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX7-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX7-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_or_v2i128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX9-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX9-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX9-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX9-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX9-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_or_v2i128: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX8-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_or_v2i128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX10-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX10-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX10-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX10-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX10-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX10-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_or_v2i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX11-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX11-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX11-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX11-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX11-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX11-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %or = or <2 x i128> %a, %b - ret <2 x i128> %or -} - -define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) { -; GFX7-LABEL: v_or_v2i128_inline_imm: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX7-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_or_v2i128_inline_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX9-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_or_v2i128_inline_imm: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX8-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_or_v2i128_inline_imm: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX10-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_or_v2i128_inline_imm: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX11-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %or = or <2 x i128> %a, - ret <2 x i128> %or -} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll index b27a35ce0753a..8e10078ce8b2c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll @@ -838,6 +838,115 @@ define amdgpu_kernel void @s_xor_u64_sext_with_sregs(ptr addrspace(1) %out, ptr store i64 %xor, ptr addrspace(1) %out, align 8 ret void } + +define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) { +; GFX7-LABEL: v_xor_v2i128: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX7-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX7-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX7-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX7-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX7-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_xor_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX9-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX9-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX9-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_xor_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX8-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX8-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX8-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX8-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_xor_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX10-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX10-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX10-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX10-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX10-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_xor_v2i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX11-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX11-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX11-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX11-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX11-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX11-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX11-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %xor = xor <2 x i128> %a, %b + ret <2 x i128> %xor +} + +define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) { +; GFX7-LABEL: v_xor_v2i128_inline_imm: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX7-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_xor_v2i128_inline_imm: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX9-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_xor_v2i128_inline_imm: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX8-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_xor_v2i128_inline_imm: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX10-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_xor_v2i128_inline_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX11-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %xor = xor <2 x i128> %a, + ret <2 x i128> %xor +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11-FAKE16: {{.*}} ; GFX11-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll deleted file mode 100644 index 2b5139783a144..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll +++ /dev/null @@ -1,114 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s - -define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) { -; GFX7-LABEL: v_xor_v2i128: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX7-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX7-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX7-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX7-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_xor_v2i128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX9-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX9-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX9-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX9-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_xor_v2i128: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX8-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX8-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX8-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX8-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX8-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_xor_v2i128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX10-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX10-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX10-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX10-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX10-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_xor_v2i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX11-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX11-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX11-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX11-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX11-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX11-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX11-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %xor = xor <2 x i128> %a, %b - ret <2 x i128> %xor -} - -define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) { -; GFX7-LABEL: v_xor_v2i128_inline_imm: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX7-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_xor_v2i128_inline_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX9-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_xor_v2i128_inline_imm: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX8-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_xor_v2i128_inline_imm: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX10-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_xor_v2i128_inline_imm: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX11-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %xor = xor <2 x i128> %a, - ret <2 x i128> %xor -} From 73137e3558670bb7c0ea69deb632fc7c99960e7c Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Tue, 6 May 2025 16:27:35 -0400 Subject: [PATCH 5/5] [GlobalISel][AMDGPU] Update tests Change-Id: I25afdc40b28be4201473923ff527062e9c76f19f --- llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll | 362 +++++++++++++++------ llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll | 341 ++++++++++++++----- llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll | 340 ++++++++++++++----- 3 files changed, 764 insertions(+), 279 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll index 87f39b6d2b604..18578c55697cf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll @@ -836,113 +836,65 @@ define amdgpu_kernel void @s_and_u64_sext_with_sregs(ptr addrspace(1) %out, ptr } define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) { -; GFX7-LABEL: v_and_v2i128: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX7-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX7-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX7-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX7-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_and_v2i128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX9-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX9-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX9-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX9-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX9-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_and_v2i128: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX8-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX8-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX8-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX8-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX8-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_and_v2i128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX10-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX10-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX10-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX10-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX10-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX10-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX10-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_and_v2i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, v0, v8 +; GCN-NEXT: v_and_b32_e32 v1, v1, v9 +; GCN-NEXT: v_and_b32_e32 v2, v2, v10 +; GCN-NEXT: v_and_b32_e32 v3, v3, v11 +; GCN-NEXT: v_and_b32_e32 v4, v4, v12 +; GCN-NEXT: v_and_b32_e32 v5, v5, v13 +; GCN-NEXT: v_and_b32_e32 v6, v6, v14 +; GCN-NEXT: v_and_b32_e32 v7, v7, v15 +; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_and_v2i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX11-NEXT: v_and_b32_e32 v1, v1, v9 -; GFX11-NEXT: v_and_b32_e32 v2, v2, v10 -; GFX11-NEXT: v_and_b32_e32 v3, v3, v11 -; GFX11-NEXT: v_and_b32_e32 v4, v4, v12 -; GFX11-NEXT: v_and_b32_e32 v5, v5, v13 -; GFX11-NEXT: v_and_b32_e32 v6, v6, v14 -; GFX11-NEXT: v_and_b32_e32 v7, v7, v15 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: v_and_v2i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_and_v2i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX12-NEXT: v_and_b32_e32 v1, v1, v9 +; GFX12-NEXT: v_and_b32_e32 v2, v2, v10 +; GFX12-NEXT: v_and_b32_e32 v3, v3, v11 +; GFX12-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX12-NEXT: v_and_b32_e32 v5, v5, v13 +; GFX12-NEXT: v_and_b32_e32 v6, v6, v14 +; GFX12-NEXT: v_and_b32_e32 v7, v7, v15 +; GFX12-NEXT: s_setpc_b64 s[30:31] %and = and <2 x i128> %a, %b ret <2 x i128> %and } define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) { -; GFX7-LABEL: v_and_v2i128_inline_imm: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, 64, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 64, v4 -; GFX7-NEXT: v_mov_b32_e32 v1, 0 -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0 -; GFX7-NEXT: v_mov_b32_e32 v5, 0 -; GFX7-NEXT: v_mov_b32_e32 v6, 0 -; GFX7-NEXT: v_mov_b32_e32 v7, 0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_and_v2i128_inline_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v0, 64, v0 -; GFX9-NEXT: v_and_b32_e32 v4, 64, v4 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-NEXT: v_mov_b32_e32 v7, 0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_and_v2i128_inline_imm: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v0, 64, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 64, v4 -; GFX8-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 -; GFX8-NEXT: v_mov_b32_e32 v3, 0 -; GFX8-NEXT: v_mov_b32_e32 v5, 0 -; GFX8-NEXT: v_mov_b32_e32 v6, 0 -; GFX8-NEXT: v_mov_b32_e32 v7, 0 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_and_v2i128_inline_imm: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 64, v0 +; GCN-NEXT: v_and_b32_e32 v4, 64, v4 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: v_mov_b32_e32 v6, 0 +; GCN-NEXT: v_mov_b32_e32 v7, 0 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_and_v2i128_inline_imm: ; GFX10: ; %bb.0: @@ -965,9 +917,217 @@ define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) { ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0 ; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_and_v2i128_inline_imm: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0 +; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4 +; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0 +; GFX12-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0 +; GFX12-NEXT: s_setpc_b64 s[30:31] %and = and <2 x i128> %a, ret <2 x i128> %and } + +define <3 x i128> @v_and_v3i128(<3 x i128> %a, <3 x i128> %b) { +; GCN-LABEL: v_and_v3i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, v0, v12 +; GCN-NEXT: v_and_b32_e32 v1, v1, v13 +; GCN-NEXT: v_and_b32_e32 v2, v2, v14 +; GCN-NEXT: v_and_b32_e32 v3, v3, v15 +; GCN-NEXT: v_and_b32_e32 v4, v4, v16 +; GCN-NEXT: v_and_b32_e32 v5, v5, v17 +; GCN-NEXT: v_and_b32_e32 v6, v6, v18 +; GCN-NEXT: v_and_b32_e32 v7, v7, v19 +; GCN-NEXT: v_and_b32_e32 v8, v8, v20 +; GCN-NEXT: v_and_b32_e32 v9, v9, v21 +; GCN-NEXT: v_and_b32_e32 v10, v10, v22 +; GCN-NEXT: v_and_b32_e32 v11, v11, v23 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10PLUS-LABEL: v_and_v3i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v12 +; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v13 +; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v14 +; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v15 +; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v16 +; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v17 +; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v18 +; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v19 +; GFX10PLUS-NEXT: v_and_b32_e32 v8, v8, v20 +; GFX10PLUS-NEXT: v_and_b32_e32 v9, v9, v21 +; GFX10PLUS-NEXT: v_and_b32_e32 v10, v10, v22 +; GFX10PLUS-NEXT: v_and_b32_e32 v11, v11, v23 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_and_v3i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v0, v0, v12 +; GFX12-NEXT: v_and_b32_e32 v1, v1, v13 +; GFX12-NEXT: v_and_b32_e32 v2, v2, v14 +; GFX12-NEXT: v_and_b32_e32 v3, v3, v15 +; GFX12-NEXT: v_and_b32_e32 v4, v4, v16 +; GFX12-NEXT: v_and_b32_e32 v5, v5, v17 +; GFX12-NEXT: v_and_b32_e32 v6, v6, v18 +; GFX12-NEXT: v_and_b32_e32 v7, v7, v19 +; GFX12-NEXT: v_and_b32_e32 v8, v8, v20 +; GFX12-NEXT: v_and_b32_e32 v9, v9, v21 +; GFX12-NEXT: v_and_b32_e32 v10, v10, v22 +; GFX12-NEXT: v_and_b32_e32 v11, v11, v23 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %and = and <3 x i128> %a, %b + ret <3 x i128> %and +} + +define <1 x i128> @v_and_v1i128(<1 x i128> %a, <1 x i128> %b) { +; GCN-LABEL: v_and_v1i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, v0, v4 +; GCN-NEXT: v_and_b32_e32 v1, v1, v5 +; GCN-NEXT: v_and_b32_e32 v2, v2, v6 +; GCN-NEXT: v_and_b32_e32 v3, v3, v7 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10PLUS-LABEL: v_and_v1i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4 +; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5 +; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6 +; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_and_v1i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v0, v0, v4 +; GFX12-NEXT: v_and_b32_e32 v1, v1, v5 +; GFX12-NEXT: v_and_b32_e32 v2, v2, v6 +; GFX12-NEXT: v_and_b32_e32 v3, v3, v7 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %and = and <1 x i128> %a, %b + ret <1 x i128> %and +} + +define <2 x i256> @v_and_v2i256(<2 x i256> %a, <2 x i256> %b) { +; GCN-LABEL: v_and_v2i256: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, v0, v16 +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GCN-NEXT: v_and_b32_e32 v1, v1, v17 +; GCN-NEXT: v_and_b32_e32 v2, v2, v18 +; GCN-NEXT: v_and_b32_e32 v3, v3, v19 +; GCN-NEXT: v_and_b32_e32 v4, v4, v20 +; GCN-NEXT: v_and_b32_e32 v5, v5, v21 +; GCN-NEXT: v_and_b32_e32 v6, v6, v22 +; GCN-NEXT: v_and_b32_e32 v7, v7, v23 +; GCN-NEXT: v_and_b32_e32 v8, v8, v24 +; GCN-NEXT: v_and_b32_e32 v9, v9, v25 +; GCN-NEXT: v_and_b32_e32 v10, v10, v26 +; GCN-NEXT: v_and_b32_e32 v11, v11, v27 +; GCN-NEXT: v_and_b32_e32 v12, v12, v28 +; GCN-NEXT: v_and_b32_e32 v13, v13, v29 +; GCN-NEXT: v_and_b32_e32 v14, v14, v30 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_and_b32_e32 v15, v15, v16 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_and_v2i256: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-NEXT: v_and_b32_e32 v0, v0, v16 +; GFX10-NEXT: v_and_b32_e32 v1, v1, v17 +; GFX10-NEXT: v_and_b32_e32 v2, v2, v18 +; GFX10-NEXT: v_and_b32_e32 v3, v3, v19 +; GFX10-NEXT: v_and_b32_e32 v4, v4, v20 +; GFX10-NEXT: v_and_b32_e32 v5, v5, v21 +; GFX10-NEXT: v_and_b32_e32 v6, v6, v22 +; GFX10-NEXT: v_and_b32_e32 v7, v7, v23 +; GFX10-NEXT: v_and_b32_e32 v8, v8, v24 +; GFX10-NEXT: v_and_b32_e32 v9, v9, v25 +; GFX10-NEXT: v_and_b32_e32 v10, v10, v26 +; GFX10-NEXT: v_and_b32_e32 v11, v11, v27 +; GFX10-NEXT: v_and_b32_e32 v12, v12, v28 +; GFX10-NEXT: v_and_b32_e32 v13, v13, v29 +; GFX10-NEXT: v_and_b32_e32 v14, v14, v30 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_and_b32_e32 v15, v15, v31 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_and_v2i256: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-NEXT: v_and_b32_e32 v0, v0, v16 +; GFX11-NEXT: v_and_b32_e32 v1, v1, v17 +; GFX11-NEXT: v_and_b32_e32 v2, v2, v18 +; GFX11-NEXT: v_and_b32_e32 v3, v3, v19 +; GFX11-NEXT: v_and_b32_e32 v4, v4, v20 +; GFX11-NEXT: v_and_b32_e32 v5, v5, v21 +; GFX11-NEXT: v_and_b32_e32 v6, v6, v22 +; GFX11-NEXT: v_and_b32_e32 v7, v7, v23 +; GFX11-NEXT: v_and_b32_e32 v8, v8, v24 +; GFX11-NEXT: v_and_b32_e32 v9, v9, v25 +; GFX11-NEXT: v_and_b32_e32 v10, v10, v26 +; GFX11-NEXT: v_and_b32_e32 v11, v11, v27 +; GFX11-NEXT: v_and_b32_e32 v12, v12, v28 +; GFX11-NEXT: v_and_b32_e32 v13, v13, v29 +; GFX11-NEXT: v_and_b32_e32 v14, v14, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v15, v15, v31 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_and_v2i256: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: scratch_load_b32 v31, off, s32 +; GFX12-NEXT: v_and_b32_e32 v0, v0, v16 +; GFX12-NEXT: v_and_b32_e32 v1, v1, v17 +; GFX12-NEXT: v_and_b32_e32 v2, v2, v18 +; GFX12-NEXT: v_and_b32_e32 v3, v3, v19 +; GFX12-NEXT: v_and_b32_e32 v4, v4, v20 +; GFX12-NEXT: v_and_b32_e32 v5, v5, v21 +; GFX12-NEXT: v_and_b32_e32 v6, v6, v22 +; GFX12-NEXT: v_and_b32_e32 v7, v7, v23 +; GFX12-NEXT: v_and_b32_e32 v8, v8, v24 +; GFX12-NEXT: v_and_b32_e32 v9, v9, v25 +; GFX12-NEXT: v_and_b32_e32 v10, v10, v26 +; GFX12-NEXT: v_and_b32_e32 v11, v11, v27 +; GFX12-NEXT: v_and_b32_e32 v12, v12, v28 +; GFX12-NEXT: v_and_b32_e32 v13, v13, v29 +; GFX12-NEXT: v_and_b32_e32 v14, v14, v30 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v15, v15, v31 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %and = and <2 x i256> %a, %b + ret <2 x i256> %and +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11-FAKE16: {{.*}} ; GFX11-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll index 83b80bd5c50eb..af377b1d76817 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll @@ -840,112 +840,275 @@ define amdgpu_kernel void @s_or_u64_sext_with_sregs(ptr addrspace(1) %out, ptr a } define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) { -; GFX7-LABEL: v_or_v2i128: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX7-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX7-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX7-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX7-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_or_v2i128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX9-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX9-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX9-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX9-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX9-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_or_v2i128: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX8-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_or_v2i128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX10-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX10-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX10-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX10-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX10-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX10-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_or_v2i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v0, v0, v8 +; GCN-NEXT: v_or_b32_e32 v1, v1, v9 +; GCN-NEXT: v_or_b32_e32 v2, v2, v10 +; GCN-NEXT: v_or_b32_e32 v3, v3, v11 +; GCN-NEXT: v_or_b32_e32 v4, v4, v12 +; GCN-NEXT: v_or_b32_e32 v5, v5, v13 +; GCN-NEXT: v_or_b32_e32 v6, v6, v14 +; GCN-NEXT: v_or_b32_e32 v7, v7, v15 +; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_or_v2i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX11-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX11-NEXT: v_or_b32_e32 v3, v3, v11 -; GFX11-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX11-NEXT: v_or_b32_e32 v5, v5, v13 -; GFX11-NEXT: v_or_b32_e32 v6, v6, v14 -; GFX11-NEXT: v_or_b32_e32 v7, v7, v15 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: v_or_v2i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX10PLUS-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX10PLUS-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX10PLUS-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX10PLUS-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX10PLUS-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX10PLUS-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_or_v2i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX12-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX12-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX12-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX12-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX12-NEXT: v_or_b32_e32 v5, v5, v13 +; GFX12-NEXT: v_or_b32_e32 v6, v6, v14 +; GFX12-NEXT: v_or_b32_e32 v7, v7, v15 +; GFX12-NEXT: s_setpc_b64 s[30:31] %or = or <2 x i128> %a, %b ret <2 x i128> %or } define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) { -; GFX7-LABEL: v_or_v2i128_inline_imm: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX7-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_or_v2i128_inline_imm: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v0, 64, v0 +; GCN-NEXT: v_or_b32_e32 v4, 64, v4 +; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_or_v2i128_inline_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX9-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: v_or_v2i128_inline_imm: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX10PLUS-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: v_or_v2i128_inline_imm: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX8-NEXT: v_or_b32_e32 v4, 64, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX12-LABEL: v_or_v2i128_inline_imm: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_or_b32_e32 v0, 64, v0 +; GFX12-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %or = or <2 x i128> %a, + ret <2 x i128> %or +} + +define <3 x i128> @v_or_v3i128(<3 x i128> %a, <3 x i128> %b) { +; GCN-LABEL: v_or_v3i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v0, v0, v12 +; GCN-NEXT: v_or_b32_e32 v1, v1, v13 +; GCN-NEXT: v_or_b32_e32 v2, v2, v14 +; GCN-NEXT: v_or_b32_e32 v3, v3, v15 +; GCN-NEXT: v_or_b32_e32 v4, v4, v16 +; GCN-NEXT: v_or_b32_e32 v5, v5, v17 +; GCN-NEXT: v_or_b32_e32 v6, v6, v18 +; GCN-NEXT: v_or_b32_e32 v7, v7, v19 +; GCN-NEXT: v_or_b32_e32 v8, v8, v20 +; GCN-NEXT: v_or_b32_e32 v9, v9, v21 +; GCN-NEXT: v_or_b32_e32 v10, v10, v22 +; GCN-NEXT: v_or_b32_e32 v11, v11, v23 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10PLUS-LABEL: v_or_v3i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v12 +; GFX10PLUS-NEXT: v_or_b32_e32 v1, v1, v13 +; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v14 +; GFX10PLUS-NEXT: v_or_b32_e32 v3, v3, v15 +; GFX10PLUS-NEXT: v_or_b32_e32 v4, v4, v16 +; GFX10PLUS-NEXT: v_or_b32_e32 v5, v5, v17 +; GFX10PLUS-NEXT: v_or_b32_e32 v6, v6, v18 +; GFX10PLUS-NEXT: v_or_b32_e32 v7, v7, v19 +; GFX10PLUS-NEXT: v_or_b32_e32 v8, v8, v20 +; GFX10PLUS-NEXT: v_or_b32_e32 v9, v9, v21 +; GFX10PLUS-NEXT: v_or_b32_e32 v10, v10, v22 +; GFX10PLUS-NEXT: v_or_b32_e32 v11, v11, v23 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_or_v2i128_inline_imm: +; GFX12-LABEL: v_or_v3i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_or_b32_e32 v0, v0, v12 +; GFX12-NEXT: v_or_b32_e32 v1, v1, v13 +; GFX12-NEXT: v_or_b32_e32 v2, v2, v14 +; GFX12-NEXT: v_or_b32_e32 v3, v3, v15 +; GFX12-NEXT: v_or_b32_e32 v4, v4, v16 +; GFX12-NEXT: v_or_b32_e32 v5, v5, v17 +; GFX12-NEXT: v_or_b32_e32 v6, v6, v18 +; GFX12-NEXT: v_or_b32_e32 v7, v7, v19 +; GFX12-NEXT: v_or_b32_e32 v8, v8, v20 +; GFX12-NEXT: v_or_b32_e32 v9, v9, v21 +; GFX12-NEXT: v_or_b32_e32 v10, v10, v22 +; GFX12-NEXT: v_or_b32_e32 v11, v11, v23 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %or = or <3 x i128> %a, %b + ret <3 x i128> %or +} + +define <1 x i128> @v_or_v1i128(<1 x i128> %a, <1 x i128> %b) { +; GCN-LABEL: v_or_v1i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v0, v0, v4 +; GCN-NEXT: v_or_b32_e32 v1, v1, v5 +; GCN-NEXT: v_or_b32_e32 v2, v2, v6 +; GCN-NEXT: v_or_b32_e32 v3, v3, v7 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10PLUS-LABEL: v_or_v1i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX10PLUS-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX10PLUS-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_or_v1i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX12-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX12-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX12-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %or = or <1 x i128> %a, %b + ret <1 x i128> %or +} + +define <2 x i256> @v_or_v2i256(<2 x i256> %a, <2 x i256> %b) { +; GCN-LABEL: v_or_v2i256: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v0, v0, v16 +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GCN-NEXT: v_or_b32_e32 v1, v1, v17 +; GCN-NEXT: v_or_b32_e32 v2, v2, v18 +; GCN-NEXT: v_or_b32_e32 v3, v3, v19 +; GCN-NEXT: v_or_b32_e32 v4, v4, v20 +; GCN-NEXT: v_or_b32_e32 v5, v5, v21 +; GCN-NEXT: v_or_b32_e32 v6, v6, v22 +; GCN-NEXT: v_or_b32_e32 v7, v7, v23 +; GCN-NEXT: v_or_b32_e32 v8, v8, v24 +; GCN-NEXT: v_or_b32_e32 v9, v9, v25 +; GCN-NEXT: v_or_b32_e32 v10, v10, v26 +; GCN-NEXT: v_or_b32_e32 v11, v11, v27 +; GCN-NEXT: v_or_b32_e32 v12, v12, v28 +; GCN-NEXT: v_or_b32_e32 v13, v13, v29 +; GCN-NEXT: v_or_b32_e32 v14, v14, v30 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_or_v2i256: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX10-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v16 +; GFX10-NEXT: v_or_b32_e32 v1, v1, v17 +; GFX10-NEXT: v_or_b32_e32 v2, v2, v18 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v19 +; GFX10-NEXT: v_or_b32_e32 v4, v4, v20 +; GFX10-NEXT: v_or_b32_e32 v5, v5, v21 +; GFX10-NEXT: v_or_b32_e32 v6, v6, v22 +; GFX10-NEXT: v_or_b32_e32 v7, v7, v23 +; GFX10-NEXT: v_or_b32_e32 v8, v8, v24 +; GFX10-NEXT: v_or_b32_e32 v9, v9, v25 +; GFX10-NEXT: v_or_b32_e32 v10, v10, v26 +; GFX10-NEXT: v_or_b32_e32 v11, v11, v27 +; GFX10-NEXT: v_or_b32_e32 v12, v12, v28 +; GFX10-NEXT: v_or_b32_e32 v13, v13, v29 +; GFX10-NEXT: v_or_b32_e32 v14, v14, v30 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_or_b32_e32 v15, v15, v31 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_or_v2i128_inline_imm: +; GFX11-LABEL: v_or_v2i256: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_or_b32_e32 v0, 64, v0 -; GFX11-NEXT: v_or_b32_e32 v4, 64, v4 +; GFX11-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v16 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v17 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v18 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v19 +; GFX11-NEXT: v_or_b32_e32 v4, v4, v20 +; GFX11-NEXT: v_or_b32_e32 v5, v5, v21 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v22 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v23 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v24 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v25 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v26 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v27 +; GFX11-NEXT: v_or_b32_e32 v12, v12, v28 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v29 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_or_b32_e32 v15, v15, v31 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %or = or <2 x i128> %a, - ret <2 x i128> %or +; +; GFX12-LABEL: v_or_v2i256: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: scratch_load_b32 v31, off, s32 +; GFX12-NEXT: v_or_b32_e32 v0, v0, v16 +; GFX12-NEXT: v_or_b32_e32 v1, v1, v17 +; GFX12-NEXT: v_or_b32_e32 v2, v2, v18 +; GFX12-NEXT: v_or_b32_e32 v3, v3, v19 +; GFX12-NEXT: v_or_b32_e32 v4, v4, v20 +; GFX12-NEXT: v_or_b32_e32 v5, v5, v21 +; GFX12-NEXT: v_or_b32_e32 v6, v6, v22 +; GFX12-NEXT: v_or_b32_e32 v7, v7, v23 +; GFX12-NEXT: v_or_b32_e32 v8, v8, v24 +; GFX12-NEXT: v_or_b32_e32 v9, v9, v25 +; GFX12-NEXT: v_or_b32_e32 v10, v10, v26 +; GFX12-NEXT: v_or_b32_e32 v11, v11, v27 +; GFX12-NEXT: v_or_b32_e32 v12, v12, v28 +; GFX12-NEXT: v_or_b32_e32 v13, v13, v29 +; GFX12-NEXT: v_or_b32_e32 v14, v14, v30 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_or_b32_e32 v15, v15, v31 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %or = or <2 x i256> %a, %b + ret <2 x i256> %or } + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11-FAKE16: {{.*}} ; GFX11-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll index 8e10078ce8b2c..4755da1392684 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll @@ -840,111 +840,273 @@ define amdgpu_kernel void @s_xor_u64_sext_with_sregs(ptr addrspace(1) %out, ptr } define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) { -; GFX7-LABEL: v_xor_v2i128: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX7-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX7-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX7-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX7-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_xor_v2i128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX9-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX9-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX9-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX9-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_xor_v2i128: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX8-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX8-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX8-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX8-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX8-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_xor_v2i128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX10-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX10-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX10-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX10-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX10-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_xor_v2i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v0, v0, v8 +; GCN-NEXT: v_xor_b32_e32 v1, v1, v9 +; GCN-NEXT: v_xor_b32_e32 v2, v2, v10 +; GCN-NEXT: v_xor_b32_e32 v3, v3, v11 +; GCN-NEXT: v_xor_b32_e32 v4, v4, v12 +; GCN-NEXT: v_xor_b32_e32 v5, v5, v13 +; GCN-NEXT: v_xor_b32_e32 v6, v6, v14 +; GCN-NEXT: v_xor_b32_e32 v7, v7, v15 +; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_xor_v2i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_xor_b32_e32 v0, v0, v8 -; GFX11-NEXT: v_xor_b32_e32 v1, v1, v9 -; GFX11-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX11-NEXT: v_xor_b32_e32 v3, v3, v11 -; GFX11-NEXT: v_xor_b32_e32 v4, v4, v12 -; GFX11-NEXT: v_xor_b32_e32 v5, v5, v13 -; GFX11-NEXT: v_xor_b32_e32 v6, v6, v14 -; GFX11-NEXT: v_xor_b32_e32 v7, v7, v15 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: v_xor_v2i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX10PLUS-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX10PLUS-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX10PLUS-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX10PLUS-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX10PLUS-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX10PLUS-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX10PLUS-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_xor_v2i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX12-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX12-NEXT: v_xor_b32_e32 v2, v2, v10 +; GFX12-NEXT: v_xor_b32_e32 v3, v3, v11 +; GFX12-NEXT: v_xor_b32_e32 v4, v4, v12 +; GFX12-NEXT: v_xor_b32_e32 v5, v5, v13 +; GFX12-NEXT: v_xor_b32_e32 v6, v6, v14 +; GFX12-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX12-NEXT: s_setpc_b64 s[30:31] %xor = xor <2 x i128> %a, %b ret <2 x i128> %xor } define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) { -; GFX7-LABEL: v_xor_v2i128_inline_imm: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX7-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_xor_v2i128_inline_imm: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v0, 64, v0 +; GCN-NEXT: v_xor_b32_e32 v4, 64, v4 +; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_xor_v2i128_inline_imm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX9-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: v_xor_v2i128_inline_imm: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX10PLUS-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: v_xor_v2i128_inline_imm: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX8-NEXT: v_xor_b32_e32 v4, 64, v4 -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GFX12-LABEL: v_xor_v2i128_inline_imm: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_xor_b32_e32 v0, 64, v0 +; GFX12-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %xor = xor <2 x i128> %a, + ret <2 x i128> %xor +} + +define <3 x i128> @v_xor_v3i128(<3 x i128> %a, <3 x i128> %b) { +; GCN-LABEL: v_xor_v3i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v0, v0, v12 +; GCN-NEXT: v_xor_b32_e32 v1, v1, v13 +; GCN-NEXT: v_xor_b32_e32 v2, v2, v14 +; GCN-NEXT: v_xor_b32_e32 v3, v3, v15 +; GCN-NEXT: v_xor_b32_e32 v4, v4, v16 +; GCN-NEXT: v_xor_b32_e32 v5, v5, v17 +; GCN-NEXT: v_xor_b32_e32 v6, v6, v18 +; GCN-NEXT: v_xor_b32_e32 v7, v7, v19 +; GCN-NEXT: v_xor_b32_e32 v8, v8, v20 +; GCN-NEXT: v_xor_b32_e32 v9, v9, v21 +; GCN-NEXT: v_xor_b32_e32 v10, v10, v22 +; GCN-NEXT: v_xor_b32_e32 v11, v11, v23 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10PLUS-LABEL: v_xor_v3i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_xor_b32_e32 v0, v0, v12 +; GFX10PLUS-NEXT: v_xor_b32_e32 v1, v1, v13 +; GFX10PLUS-NEXT: v_xor_b32_e32 v2, v2, v14 +; GFX10PLUS-NEXT: v_xor_b32_e32 v3, v3, v15 +; GFX10PLUS-NEXT: v_xor_b32_e32 v4, v4, v16 +; GFX10PLUS-NEXT: v_xor_b32_e32 v5, v5, v17 +; GFX10PLUS-NEXT: v_xor_b32_e32 v6, v6, v18 +; GFX10PLUS-NEXT: v_xor_b32_e32 v7, v7, v19 +; GFX10PLUS-NEXT: v_xor_b32_e32 v8, v8, v20 +; GFX10PLUS-NEXT: v_xor_b32_e32 v9, v9, v21 +; GFX10PLUS-NEXT: v_xor_b32_e32 v10, v10, v22 +; GFX10PLUS-NEXT: v_xor_b32_e32 v11, v11, v23 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_xor_v3i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_xor_b32_e32 v0, v0, v12 +; GFX12-NEXT: v_xor_b32_e32 v1, v1, v13 +; GFX12-NEXT: v_xor_b32_e32 v2, v2, v14 +; GFX12-NEXT: v_xor_b32_e32 v3, v3, v15 +; GFX12-NEXT: v_xor_b32_e32 v4, v4, v16 +; GFX12-NEXT: v_xor_b32_e32 v5, v5, v17 +; GFX12-NEXT: v_xor_b32_e32 v6, v6, v18 +; GFX12-NEXT: v_xor_b32_e32 v7, v7, v19 +; GFX12-NEXT: v_xor_b32_e32 v8, v8, v20 +; GFX12-NEXT: v_xor_b32_e32 v9, v9, v21 +; GFX12-NEXT: v_xor_b32_e32 v10, v10, v22 +; GFX12-NEXT: v_xor_b32_e32 v11, v11, v23 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %xor = xor <3 x i128> %a, %b + ret <3 x i128> %xor +} + +define <1 x i128> @v_xor_v1i128(<1 x i128> %a, <1 x i128> %b) { +; GCN-LABEL: v_xor_v1i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v0, v0, v4 +; GCN-NEXT: v_xor_b32_e32 v1, v1, v5 +; GCN-NEXT: v_xor_b32_e32 v2, v2, v6 +; GCN-NEXT: v_xor_b32_e32 v3, v3, v7 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10PLUS-LABEL: v_xor_v1i128: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_xor_b32_e32 v0, v0, v4 +; GFX10PLUS-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX10PLUS-NEXT: v_xor_b32_e32 v2, v2, v6 +; GFX10PLUS-NEXT: v_xor_b32_e32 v3, v3, v7 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_xor_v1i128: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_xor_b32_e32 v0, v0, v4 +; GFX12-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX12-NEXT: v_xor_b32_e32 v2, v2, v6 +; GFX12-NEXT: v_xor_b32_e32 v3, v3, v7 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %xor = xor <1 x i128> %a, %b + ret <1 x i128> %xor +} + +define <2 x i256> @v_xor_v2i256(<2 x i256> %a, <2 x i256> %b) { +; GCN-LABEL: v_xor_v2i256: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v0, v0, v16 +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GCN-NEXT: v_xor_b32_e32 v1, v1, v17 +; GCN-NEXT: v_xor_b32_e32 v2, v2, v18 +; GCN-NEXT: v_xor_b32_e32 v3, v3, v19 +; GCN-NEXT: v_xor_b32_e32 v4, v4, v20 +; GCN-NEXT: v_xor_b32_e32 v5, v5, v21 +; GCN-NEXT: v_xor_b32_e32 v6, v6, v22 +; GCN-NEXT: v_xor_b32_e32 v7, v7, v23 +; GCN-NEXT: v_xor_b32_e32 v8, v8, v24 +; GCN-NEXT: v_xor_b32_e32 v9, v9, v25 +; GCN-NEXT: v_xor_b32_e32 v10, v10, v26 +; GCN-NEXT: v_xor_b32_e32 v11, v11, v27 +; GCN-NEXT: v_xor_b32_e32 v12, v12, v28 +; GCN-NEXT: v_xor_b32_e32 v13, v13, v29 +; GCN-NEXT: v_xor_b32_e32 v14, v14, v30 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v15, v15, v16 +; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_xor_v2i128_inline_imm: +; GFX10-LABEL: v_xor_v2i256: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX10-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-NEXT: v_xor_b32_e32 v0, v0, v16 +; GFX10-NEXT: v_xor_b32_e32 v1, v1, v17 +; GFX10-NEXT: v_xor_b32_e32 v2, v2, v18 +; GFX10-NEXT: v_xor_b32_e32 v3, v3, v19 +; GFX10-NEXT: v_xor_b32_e32 v4, v4, v20 +; GFX10-NEXT: v_xor_b32_e32 v5, v5, v21 +; GFX10-NEXT: v_xor_b32_e32 v6, v6, v22 +; GFX10-NEXT: v_xor_b32_e32 v7, v7, v23 +; GFX10-NEXT: v_xor_b32_e32 v8, v8, v24 +; GFX10-NEXT: v_xor_b32_e32 v9, v9, v25 +; GFX10-NEXT: v_xor_b32_e32 v10, v10, v26 +; GFX10-NEXT: v_xor_b32_e32 v11, v11, v27 +; GFX10-NEXT: v_xor_b32_e32 v12, v12, v28 +; GFX10-NEXT: v_xor_b32_e32 v13, v13, v29 +; GFX10-NEXT: v_xor_b32_e32 v14, v14, v30 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_xor_b32_e32 v15, v15, v31 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_xor_v2i128_inline_imm: +; GFX11-LABEL: v_xor_v2i256: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_xor_b32_e32 v0, 64, v0 -; GFX11-NEXT: v_xor_b32_e32 v4, 64, v4 +; GFX11-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-NEXT: v_xor_b32_e32 v0, v0, v16 +; GFX11-NEXT: v_xor_b32_e32 v1, v1, v17 +; GFX11-NEXT: v_xor_b32_e32 v2, v2, v18 +; GFX11-NEXT: v_xor_b32_e32 v3, v3, v19 +; GFX11-NEXT: v_xor_b32_e32 v4, v4, v20 +; GFX11-NEXT: v_xor_b32_e32 v5, v5, v21 +; GFX11-NEXT: v_xor_b32_e32 v6, v6, v22 +; GFX11-NEXT: v_xor_b32_e32 v7, v7, v23 +; GFX11-NEXT: v_xor_b32_e32 v8, v8, v24 +; GFX11-NEXT: v_xor_b32_e32 v9, v9, v25 +; GFX11-NEXT: v_xor_b32_e32 v10, v10, v26 +; GFX11-NEXT: v_xor_b32_e32 v11, v11, v27 +; GFX11-NEXT: v_xor_b32_e32 v12, v12, v28 +; GFX11-NEXT: v_xor_b32_e32 v13, v13, v29 +; GFX11-NEXT: v_xor_b32_e32 v14, v14, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v15, v15, v31 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %xor = xor <2 x i128> %a, - ret <2 x i128> %xor +; +; GFX12-LABEL: v_xor_v2i256: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: scratch_load_b32 v31, off, s32 +; GFX12-NEXT: v_xor_b32_e32 v0, v0, v16 +; GFX12-NEXT: v_xor_b32_e32 v1, v1, v17 +; GFX12-NEXT: v_xor_b32_e32 v2, v2, v18 +; GFX12-NEXT: v_xor_b32_e32 v3, v3, v19 +; GFX12-NEXT: v_xor_b32_e32 v4, v4, v20 +; GFX12-NEXT: v_xor_b32_e32 v5, v5, v21 +; GFX12-NEXT: v_xor_b32_e32 v6, v6, v22 +; GFX12-NEXT: v_xor_b32_e32 v7, v7, v23 +; GFX12-NEXT: v_xor_b32_e32 v8, v8, v24 +; GFX12-NEXT: v_xor_b32_e32 v9, v9, v25 +; GFX12-NEXT: v_xor_b32_e32 v10, v10, v26 +; GFX12-NEXT: v_xor_b32_e32 v11, v11, v27 +; GFX12-NEXT: v_xor_b32_e32 v12, v12, v28 +; GFX12-NEXT: v_xor_b32_e32 v13, v13, v29 +; GFX12-NEXT: v_xor_b32_e32 v14, v14, v30 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_xor_b32_e32 v15, v15, v31 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %xor = xor <2 x i256> %a, %b + ret <2 x i256> %xor } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: