diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 26149530b79aed..853d654457ef37 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4086,14 +4086,17 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { SDValue Control; unsigned ROpc, MOpc; +#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC) if (!PreferBEXTR) { assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then."); // If we can't make use of BEXTR then we can't fuse shift+mask stages. // Let's perform the mask first, and apply shift later. Note that we need to // widen the mask to account for the fact that we'll apply shift afterwards! Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); - ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; - MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; + ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr) + : GET_EGPR_IF_ENABLED(X86::BZHI32rr); + MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm) + : GET_EGPR_IF_ENABLED(X86::BZHI32rm); unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); } else { @@ -4108,8 +4111,10 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { } else { assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then."); // BMI requires the immediate to placed in a register. - ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; - MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; + ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr) + : GET_EGPR_IF_ENABLED(X86::BEXTR32rr); + MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm) + : GET_EGPR_IF_ENABLED(X86::BEXTR32rm); unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); } @@ -5482,25 +5487,30 @@ void X86DAGToDAGISel::Select(SDNode *Node) { switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i32: - Opc = UseMULXHi ? X86::MULX32Hrr : - UseMULX ? X86::MULX32rr : - IsSigned ? X86::IMUL32r : X86::MUL32r; - MOpc = UseMULXHi ? X86::MULX32Hrm : - UseMULX ? X86::MULX32rm : - IsSigned ? X86::IMUL32m : X86::MUL32m; + Opc = UseMULXHi ? X86::MULX32Hrr + : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr) + : IsSigned ? X86::IMUL32r + : X86::MUL32r; + MOpc = UseMULXHi ? X86::MULX32Hrm + : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm) + : IsSigned ? X86::IMUL32m + : X86::MUL32m; LoReg = UseMULX ? X86::EDX : X86::EAX; HiReg = X86::EDX; break; case MVT::i64: - Opc = UseMULXHi ? X86::MULX64Hrr : - UseMULX ? X86::MULX64rr : - IsSigned ? X86::IMUL64r : X86::MUL64r; - MOpc = UseMULXHi ? X86::MULX64Hrm : - UseMULX ? X86::MULX64rm : - IsSigned ? X86::IMUL64m : X86::MUL64m; + Opc = UseMULXHi ? X86::MULX64Hrr + : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr) + : IsSigned ? X86::IMUL64r + : X86::MUL64r; + MOpc = UseMULXHi ? X86::MULX64Hrm + : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm) + : IsSigned ? X86::IMUL64m + : X86::MUL64m; LoReg = UseMULX ? X86::RDX : X86::RAX; HiReg = X86::RDX; break; +#undef GET_EGPR_IF_ENABLED } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 76b0fe5f5cad18..560a4097a9c017 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1338,17 +1338,23 @@ defm ANDN32 : AndN, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode] defm ANDN64 : AndN, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>; } -let Predicates = [HasBMI], AddedComplexity = -6 in { +multiclass Andn_Pats { def : Pat<(and (not GR32:$src1), GR32:$src2), - (ANDN32rr GR32:$src1, GR32:$src2)>; + (!cast(ANDN32rr#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(and (not GR64:$src1), GR64:$src2), - (ANDN64rr GR64:$src1, GR64:$src2)>; + (!cast(ANDN64rr#suffix) GR64:$src1, GR64:$src2)>; def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)), - (ANDN32rm GR32:$src1, addr:$src2)>; + (!cast(ANDN32rm#suffix) GR32:$src1, addr:$src2)>; def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)), - (ANDN64rm GR64:$src1, addr:$src2)>; + (!cast(ANDN64rm#suffix) GR64:$src1, addr:$src2)>; } +let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in + defm : Andn_Pats<"">; + +let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in + defm : Andn_Pats<"_EVEX">; + //===----------------------------------------------------------------------===// // MULX Instruction // diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 753cf62392a17b..80cddc570b8427 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1241,43 +1241,49 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in { defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX; } -let Predicates = [HasBMI] in { +multiclass Bls_Pats { // FIXME(1): patterns for the load versions are not implemented // FIXME(2): By only matching `add_su` and `ineg_su` we may emit // extra `mov` instructions if `src` has future uses. It may be better // to always match if `src` has more users. def : Pat<(and GR32:$src, (add_su GR32:$src, -1)), - (BLSR32rr GR32:$src)>; + (!cast(BLSR32rr#suffix) GR32:$src)>; def : Pat<(and GR64:$src, (add_su GR64:$src, -1)), - (BLSR64rr GR64:$src)>; + (!cast(BLSR64rr#suffix) GR64:$src)>; def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)), - (BLSMSK32rr GR32:$src)>; + (!cast(BLSMSK32rr#suffix) GR32:$src)>; def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)), - (BLSMSK64rr GR64:$src)>; + (!cast(BLSMSK64rr#suffix) GR64:$src)>; def : Pat<(and GR32:$src, (ineg_su GR32:$src)), - (BLSI32rr GR32:$src)>; + (!cast(BLSI32rr#suffix) GR32:$src)>; def : Pat<(and GR64:$src, (ineg_su GR64:$src)), - (BLSI64rr GR64:$src)>; + (!cast(BLSI64rr#suffix) GR64:$src)>; // Versions to match flag producing ops. def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)), - (BLSR32rr GR32:$src)>; + (!cast(BLSR32rr#suffix) GR32:$src)>; def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)), - (BLSR64rr GR64:$src)>; + (!cast(BLSR64rr#suffix) GR64:$src)>; def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)), - (BLSMSK32rr GR32:$src)>; + (!cast(BLSMSK32rr#suffix) GR32:$src)>; def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)), - (BLSMSK64rr GR64:$src)>; + (!cast(BLSMSK64rr#suffix) GR64:$src)>; def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)), - (BLSI32rr GR32:$src)>; + (!cast(BLSI32rr#suffix) GR32:$src)>; def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)), - (BLSI64rr GR64:$src)>; + (!cast(BLSI64rr#suffix) GR64:$src)>; } +let Predicates = [HasBMI, NoEGPR] in + defm : Bls_Pats<"">; + +let Predicates = [HasBMI, HasEGPR] in + defm : Bls_Pats<"_EVEX">; + multiclass Bmi4VOp3 o, string m, X86TypeInfo t, SDPatternOperator node, X86FoldableSchedWrite sched, string Suffix = ""> { let SchedRW = [sched], Form = MRMSrcReg4VOp3 in @@ -1324,7 +1330,7 @@ def AndMask64 : ImmLeaf; // Use BEXTR for 64-bit 'and' with large immediate 'mask'. -let Predicates = [HasBMI, NoBMI2, NoTBM] in { +let Predicates = [HasBMI, NoBMI2, NoTBM, NoEGPR] in { def : Pat<(and GR64:$src, AndMask64:$mask), (BEXTR64rr GR64:$src, (SUBREG_TO_REG (i64 0), @@ -1335,8 +1341,19 @@ let Predicates = [HasBMI, NoBMI2, NoTBM] in { (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>; } +let Predicates = [HasBMI, NoBMI2, NoTBM, HasEGPR] in { + def : Pat<(and GR64:$src, AndMask64:$mask), + (BEXTR64rr_EVEX GR64:$src, + (SUBREG_TO_REG (i64 0), + (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>; + def : Pat<(and (loadi64 addr:$src), AndMask64:$mask), + (BEXTR64rm_EVEX addr:$src, + (SUBREG_TO_REG (i64 0), + (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>; +} + // Use BZHI for 64-bit 'and' with large immediate 'mask'. -let Predicates = [HasBMI2, NoTBM] in { +let Predicates = [HasBMI2, NoTBM, NoEGPR] in { def : Pat<(and GR64:$src, AndMask64:$mask), (BZHI64rr GR64:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), @@ -1347,6 +1364,17 @@ let Predicates = [HasBMI2, NoTBM] in { (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>; } +let Predicates = [HasBMI2, NoTBM, HasEGPR] in { + def : Pat<(and GR64:$src, AndMask64:$mask), + (BZHI64rr_EVEX GR64:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>; + def : Pat<(and (loadi64 addr:$src), AndMask64:$mask), + (BZHI64rm_EVEX addr:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>; +} + multiclass bmi_pdep_pext { diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index c76e8cc4d12988..7166e0bc39179c 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -284,32 +284,32 @@ defm SHRX64: ShiftX<"shrx", Xi64>, XD; defm SHLX32: ShiftX<"shlx", Xi32>, PD; defm SHLX64: ShiftX<"shlx", Xi64>, PD; -multiclass RORX_Pats { +multiclass RORX_Pats { // Prefer RORX which is non-destructive and doesn't update EFLAGS. let AddedComplexity = 10 in { def : Pat<(rotr GR32:$src, (i8 imm:$shamt)), - (RORX32ri GR32:$src, imm:$shamt)>; + (!cast(RORX32ri#suffix) GR32:$src, imm:$shamt)>; def : Pat<(rotr GR64:$src, (i8 imm:$shamt)), - (RORX64ri GR64:$src, imm:$shamt)>; + (!cast(RORX64ri#suffix) GR64:$src, imm:$shamt)>; def : Pat<(rotl GR32:$src, (i8 imm:$shamt)), - (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>; + (!cast(RORX32ri#suffix) GR32:$src, (ROT32L2R_imm8 imm:$shamt))>; def : Pat<(rotl GR64:$src, (i8 imm:$shamt)), - (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>; + (!cast(RORX64ri#suffix) GR64:$src, (ROT64L2R_imm8 imm:$shamt))>; } def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)), - (RORX32mi addr:$src, imm:$shamt)>; + (!cast(RORX32mi#suffix) addr:$src, imm:$shamt)>; def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)), - (RORX64mi addr:$src, imm:$shamt)>; + (!cast(RORX64mi#suffix) addr:$src, imm:$shamt)>; def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)), - (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>; + (!cast(RORX32mi#suffix) addr:$src, (ROT32L2R_imm8 imm:$shamt))>; def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)), - (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>; + (!cast(RORX64mi#suffix) addr:$src, (ROT64L2R_imm8 imm:$shamt))>; } -multiclass ShiftX_Pats { +multiclass ShiftX_Pats { // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not // immediate shift, i.e. the following code is considered better // @@ -325,16 +325,16 @@ multiclass ShiftX_Pats { // let AddedComplexity = 1 in { def : Pat<(op GR32:$src1, GR8:$src2), - (!cast(NAME#"32rr") GR32:$src1, + (!cast(NAME#"32rr"#suffix) GR32:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(op GR64:$src1, GR8:$src2), - (!cast(NAME#"64rr") GR64:$src1, + (!cast(NAME#"64rr"#suffix) GR64:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(op GR32:$src1, (shiftMask32 GR8:$src2)), - (!cast(NAME#"32rr") GR32:$src1, + (!cast(NAME#"32rr"#suffix) GR32:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(op GR64:$src1, (shiftMask64 GR8:$src2)), - (!cast(NAME#"64rr") GR64:$src1, + (!cast(NAME#"64rr"#suffix) GR64:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } // We prefer to use @@ -348,22 +348,29 @@ multiclass ShiftX_Pats { // // This priority is enforced by IsProfitableToFoldLoad. def : Pat<(op (loadi32 addr:$src1), GR8:$src2), - (!cast(NAME#"32rm") addr:$src1, + (!cast(NAME#"32rm"#suffix) addr:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(op (loadi64 addr:$src1), GR8:$src2), - (!cast(NAME#"64rm") addr:$src1, + (!cast(NAME#"64rm"#suffix) addr:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(op (loadi32 addr:$src1), (shiftMask32 GR8:$src2)), - (!cast(NAME#"32rm") addr:$src1, + (!cast(NAME#"32rm"#suffix) addr:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(op (loadi64 addr:$src1), (shiftMask64 GR8:$src2)), - (!cast(NAME#"64rm") addr:$src1, + (!cast(NAME#"64rm"#suffix) addr:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } -let Predicates = [HasBMI2] in { - defm : RORX_Pats; +let Predicates = [HasBMI2, NoEGPR] in { + defm : RORX_Pats<"">; defm SARX : ShiftX_Pats; defm SHRX : ShiftX_Pats; defm SHLX : ShiftX_Pats; } + +let Predicates = [HasBMI2, HasEGPR] in { + defm : RORX_Pats<"_EVEX">; + defm SARX : ShiftX_Pats; + defm SHRX : ShiftX_Pats; + defm SHLX : ShiftX_Pats; +} diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll index d704f38307fcb8..5b7bb1ca97b5ca 100644 --- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c @@ -14,6 +15,13 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) { ; X64-NEXT: xorq $-1, %rax ; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test__andn_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff] +; EGPR-NEXT: andq %rsi, %rax # encoding: [0x48,0x21,0xf0] +; EGPR-NEXT: retq # encoding: [0xc3] %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 ret i64 %res @@ -24,6 +32,11 @@ define i64 @test__bextr_u64(i64 %a0, i64 %a1) { ; X64: # %bb.0: ; X64-NEXT: bextrq %rsi, %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test__bextr_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %res = call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1) ret i64 %res } @@ -35,6 +48,13 @@ define i64 @test__blsi_u64(i64 %a0) { ; X64-NEXT: subq %rdi, %rax ; X64-NEXT: andq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test__blsi_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; EGPR-NEXT: subq %rdi, %rax # encoding: [0x48,0x29,0xf8] +; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %neg = sub i64 0, %a0 %res = and i64 %a0, %neg ret i64 %res @@ -46,6 +66,12 @@ define i64 @test__blsmsk_u64(i64 %a0) { ; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: xorq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test__blsmsk_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff] +; EGPR-NEXT: xorq %rdi, %rax # encoding: [0x48,0x31,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i64 %a0, 1 %res = xor i64 %a0, %dec ret i64 %res @@ -57,6 +83,12 @@ define i64 @test__blsr_u64(i64 %a0) { ; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: andq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test__blsr_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff] +; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i64 %a0, 1 %res = and i64 %a0, %dec ret i64 %res @@ -67,6 +99,11 @@ define i64 @test__tzcnt_u64(i64 %a0) { ; X64: # %bb.0: ; X64-NEXT: tzcntq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test__tzcnt_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: tzcntq %rdi, %rax # encoding: [0xf3,0x48,0x0f,0xbc,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %cmp = icmp ne i64 %a0, 0 %cttz = call i64 @llvm.cttz.i64(i64 %a0, i1 false) ret i64 %cttz @@ -83,6 +120,13 @@ define i64 @test_andn_u64(i64 %a0, i64 %a1) { ; X64-NEXT: xorq $-1, %rax ; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test_andn_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff] +; EGPR-NEXT: andq %rsi, %rax # encoding: [0x48,0x21,0xf0] +; EGPR-NEXT: retq # encoding: [0xc3] %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 ret i64 %res @@ -98,6 +142,16 @@ define i64 @test_bextr_u64(i64 %a0, i32 %a1, i32 %a2) { ; X64-NEXT: movl %edx, %eax ; X64-NEXT: bextrq %rax, %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test_bextr_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: andl $255, %esi # encoding: [0x81,0xe6,0xff,0x00,0x00,0x00] +; EGPR-NEXT: andl $255, %edx # encoding: [0x81,0xe2,0xff,0x00,0x00,0x00] +; EGPR-NEXT: shll $8, %edx # encoding: [0xc1,0xe2,0x08] +; EGPR-NEXT: orl %esi, %edx # encoding: [0x09,0xf2] +; EGPR-NEXT: movl %edx, %eax # encoding: [0x89,0xd0] +; EGPR-NEXT: bextrq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %and1 = and i32 %a1, 255 %and2 = and i32 %a2, 255 %shl = shl i32 %and2, 8 @@ -114,6 +168,13 @@ define i64 @test_blsi_u64(i64 %a0) { ; X64-NEXT: subq %rdi, %rax ; X64-NEXT: andq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test_blsi_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; EGPR-NEXT: subq %rdi, %rax # encoding: [0x48,0x29,0xf8] +; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %neg = sub i64 0, %a0 %res = and i64 %a0, %neg ret i64 %res @@ -125,6 +186,12 @@ define i64 @test_blsmsk_u64(i64 %a0) { ; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: xorq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test_blsmsk_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff] +; EGPR-NEXT: xorq %rdi, %rax # encoding: [0x48,0x31,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i64 %a0, 1 %res = xor i64 %a0, %dec ret i64 %res @@ -136,6 +203,12 @@ define i64 @test_blsr_u64(i64 %a0) { ; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: andq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test_blsr_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff] +; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i64 %a0, 1 %res = and i64 %a0, %dec ret i64 %res @@ -146,6 +219,11 @@ define i64 @test_tzcnt_u64(i64 %a0) { ; X64: # %bb.0: ; X64-NEXT: tzcntq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: test_tzcnt_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: tzcntq %rdi, %rax # encoding: [0xf3,0x48,0x0f,0xbc,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %cmp = icmp ne i64 %a0, 0 %cttz = call i64 @llvm.cttz.i64(i64 %a0, i1 false) ret i64 %cttz diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll index 58b894a9da8b6f..7dbd1bba63861e 100644 --- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64 - +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c ; @@ -23,6 +23,14 @@ define i16 @test__tzcnt_u16(i16 %a0) { ; X64-NEXT: tzcntl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq +; +; EGPR-LABEL: test__tzcnt_u16: +; EGPR: # %bb.0: +; EGPR-NEXT: orl $65536, %edi # encoding: [0x81,0xcf,0x00,0x00,0x01,0x00] +; EGPR-NEXT: # imm = 0x10000 +; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7] +; EGPR-NEXT: # kill: def $ax killed $ax killed $eax +; EGPR-NEXT: retq # encoding: [0xc3] %zext = zext i16 %a0 to i32 %cmp = icmp ne i32 %zext, 0 %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false) @@ -43,6 +51,13 @@ define i32 @test__andn_u32(i32 %a0, i32 %a1) { ; X64-NEXT: xorl $-1, %eax ; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test__andn_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: xorl $-1, %eax # encoding: [0x83,0xf0,0xff] +; EGPR-NEXT: andl %esi, %eax # encoding: [0x21,0xf0] +; EGPR-NEXT: retq # encoding: [0xc3] %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 ret i32 %res @@ -59,6 +74,11 @@ define i32 @test__bextr_u32(i32 %a0, i32 %a1) { ; X64: # %bb.0: ; X64-NEXT: bextrl %esi, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test__bextr_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1) ret i32 %res } @@ -78,6 +98,13 @@ define i32 @test__blsi_u32(i32 %a0) { ; X64-NEXT: subl %edi, %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test__blsi_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; EGPR-NEXT: subl %edi, %eax # encoding: [0x29,0xf8] +; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %neg = sub i32 0, %a0 %res = and i32 %a0, %neg ret i32 %res @@ -97,6 +124,13 @@ define i32 @test__blsmsk_u32(i32 %a0) { ; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: xorl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test__blsmsk_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $edi killed $edi def $rdi +; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff] +; EGPR-NEXT: xorl %edi, %eax # encoding: [0x31,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i32 %a0, 1 %res = xor i32 %a0, %dec ret i32 %res @@ -116,6 +150,13 @@ define i32 @test__blsr_u32(i32 %a0) { ; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test__blsr_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $edi killed $edi def $rdi +; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff] +; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i32 %a0, 1 %res = and i32 %a0, %dec ret i32 %res @@ -131,6 +172,11 @@ define i32 @test__tzcnt_u32(i32 %a0) { ; X64: # %bb.0: ; X64-NEXT: tzcntl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test__tzcnt_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %cmp = icmp ne i32 %a0, 0 %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false) ret i32 %cttz @@ -155,6 +201,14 @@ define i16 @test_tzcnt_u16(i16 %a0) { ; X64-NEXT: tzcntl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq +; +; EGPR-LABEL: test_tzcnt_u16: +; EGPR: # %bb.0: +; EGPR-NEXT: orl $65536, %edi # encoding: [0x81,0xcf,0x00,0x00,0x01,0x00] +; EGPR-NEXT: # imm = 0x10000 +; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7] +; EGPR-NEXT: # kill: def $ax killed $ax killed $eax +; EGPR-NEXT: retq # encoding: [0xc3] %zext = zext i16 %a0 to i32 %cmp = icmp ne i32 %zext, 0 %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false) @@ -175,6 +229,13 @@ define i32 @test_andn_u32(i32 %a0, i32 %a1) { ; X64-NEXT: xorl $-1, %eax ; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test_andn_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: xorl $-1, %eax # encoding: [0x83,0xf0,0xff] +; EGPR-NEXT: andl %esi, %eax # encoding: [0x21,0xf0] +; EGPR-NEXT: retq # encoding: [0xc3] %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 ret i32 %res @@ -200,6 +261,15 @@ define i32 @test_bextr_u32(i32 %a0, i32 %a1, i32 %a2) { ; X64-NEXT: orl %esi, %edx ; X64-NEXT: bextrl %edx, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test_bextr_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: andl $255, %esi # encoding: [0x81,0xe6,0xff,0x00,0x00,0x00] +; EGPR-NEXT: andl $255, %edx # encoding: [0x81,0xe2,0xff,0x00,0x00,0x00] +; EGPR-NEXT: shll $8, %edx # encoding: [0xc1,0xe2,0x08] +; EGPR-NEXT: orl %esi, %edx # encoding: [0x09,0xf2] +; EGPR-NEXT: bextrl %edx, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x68,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %and1 = and i32 %a1, 255 %and2 = and i32 %a2, 255 %shl = shl i32 %and2, 8 @@ -223,6 +293,13 @@ define i32 @test_blsi_u32(i32 %a0) { ; X64-NEXT: subl %edi, %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test_blsi_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; EGPR-NEXT: subl %edi, %eax # encoding: [0x29,0xf8] +; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %neg = sub i32 0, %a0 %res = and i32 %a0, %neg ret i32 %res @@ -242,6 +319,13 @@ define i32 @test_blsmsk_u32(i32 %a0) { ; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: xorl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test_blsmsk_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $edi killed $edi def $rdi +; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff] +; EGPR-NEXT: xorl %edi, %eax # encoding: [0x31,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i32 %a0, 1 %res = xor i32 %a0, %dec ret i32 %res @@ -261,6 +345,13 @@ define i32 @test_blsr_u32(i32 %a0) { ; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test_blsr_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $edi killed $edi def $rdi +; EGPR-NEXT: leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff] +; EGPR-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %dec = sub i32 %a0, 1 %res = and i32 %a0, %dec ret i32 %res @@ -276,6 +367,11 @@ define i32 @test_tzcnt_u32(i32 %a0) { ; X64: # %bb.0: ; X64-NEXT: tzcntl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: test_tzcnt_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %cmp = icmp ne i32 %a0, 0 %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false) ret i32 %cttz diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll index df180d0f0235ce..aa571531c0c6aa 100644 --- a/llvm/test/CodeGen/X86/bmi-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2-SLOW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR declare i64 @llvm.x86.bmi.bextr.64(i64, i64) @@ -11,6 +12,11 @@ define i64 @bextr64(i64 %x, i64 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: bextrq %rsi, %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: bextr64: +; EGPR: # %bb.0: +; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y) ret i64 %tmp } @@ -28,6 +34,14 @@ define i64 @bextr64b(i64 %x) uwtable ssp { ; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 ; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax ; BEXTR-FAST-NEXT: retq +; +; EGPR-LABEL: bextr64b: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: shrl $4, %eax # encoding: [0xc1,0xe8,0x04] +; EGPR-NEXT: andl $4095, %eax # encoding: [0x25,0xff,0x0f,0x00,0x00] +; EGPR-NEXT: # imm = 0xFFF +; EGPR-NEXT: retq # encoding: [0xc3] %1 = lshr i64 %x, 4 %2 = and i64 %1, 4095 ret i64 %2 @@ -40,6 +54,12 @@ define i64 @bextr64_subreg(i64 %x) uwtable ssp { ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movzbl %ah, %eax ; CHECK-NEXT: retq +; +; EGPR-LABEL: bextr64_subreg: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: movzbl %ah, %eax # encoding: [0x0f,0xb6,0xc4] +; EGPR-NEXT: retq # encoding: [0xc3] %1 = lshr i64 %x, 8 %2 = and i64 %1, 255 ret i64 %2 @@ -58,6 +78,14 @@ define i64 @bextr64b_load(ptr %x) { ; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 ; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax ; BEXTR-FAST-NEXT: retq +; +; EGPR-LABEL: bextr64b_load: +; EGPR: # %bb.0: +; EGPR-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; EGPR-NEXT: shrl $4, %eax # encoding: [0xc1,0xe8,0x04] +; EGPR-NEXT: andl $4095, %eax # encoding: [0x25,0xff,0x0f,0x00,0x00] +; EGPR-NEXT: # imm = 0xFFF +; EGPR-NEXT: retq # encoding: [0xc3] %1 = load i64, ptr %x, align 8 %2 = lshr i64 %1, 4 %3 = and i64 %2, 4095 @@ -71,6 +99,12 @@ define i64 @bextr64c(i64 %x, i32 %y) { ; CHECK-NEXT: # kill: def $esi killed $esi def $rsi ; CHECK-NEXT: bextrq %rsi, %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: bextr64c: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $esi killed $esi def $rsi +; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp0 = sext i32 %y to i64 %tmp1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %tmp0) ret i64 %tmp1 @@ -96,6 +130,13 @@ define i64 @bextr64d(i64 %a) { ; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102 ; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax ; BEXTR-FAST-NEXT: retq +; +; EGPR-LABEL: bextr64d: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl $35, %eax # encoding: [0xb8,0x23,0x00,0x00,0x00] +; EGPR-NEXT: bzhiq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf5,0xc7] +; EGPR-NEXT: shrq $2, %rax # encoding: [0x48,0xc1,0xe8,0x02] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i64 %a, 2 %and = and i64 %shr, 8589934591 @@ -123,6 +164,13 @@ define i64 @bextr64d_load(ptr %aptr) { ; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102 ; BEXTR-FAST-NEXT: bextrq %rax, (%rdi), %rax ; BEXTR-FAST-NEXT: retq +; +; EGPR-LABEL: bextr64d_load: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl $35, %eax # encoding: [0xb8,0x23,0x00,0x00,0x00] +; EGPR-NEXT: bzhiq %rax, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf5,0x07] +; EGPR-NEXT: shrq $2, %rax # encoding: [0x48,0xc1,0xe8,0x02] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %aptr, align 8 %shr = lshr i64 %a, 2 @@ -137,6 +185,14 @@ define i64 @non_bextr64(i64 %x) { ; CHECK-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE ; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: non_bextr64: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: shrq $2, %rdi # encoding: [0x48,0xc1,0xef,0x02] +; EGPR-NEXT: movabsq $8589934590, %rax # encoding: [0x48,0xb8,0xfe,0xff,0xff,0xff,0x01,0x00,0x00,0x00] +; EGPR-NEXT: # imm = 0x1FFFFFFFE +; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i64 %x, 2 %and = and i64 %shr, 8589934590 diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll index e4e33c99a6b88a..2683fab59ad1bc 100644 --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define i32 @andn32(i32 %x, i32 %y) { ; X86-LABEL: andn32: @@ -17,6 +18,11 @@ define i32 @andn32(i32 %x, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: andnl %esi, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: andn32: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp1 = xor i32 %x, -1 %tmp2 = and i32 %y, %tmp1 ret i32 %tmp2 @@ -34,6 +40,11 @@ define i32 @andn32_load(i32 %x, ptr %y) { ; X64: # %bb.0: ; X64-NEXT: andnl (%rsi), %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: andn32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = load i32, ptr %y %tmp1 = xor i32 %x, -1 %tmp2 = and i32 %y1, %tmp1 @@ -53,6 +64,11 @@ define i64 @andn64(i64 %x, i64 %y) { ; X64: # %bb.0: ; X64-NEXT: andnq %rsi, %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: andn64: +; EGPR: # %bb.0: +; EGPR-NEXT: andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp1 = xor i64 %x, -1 %tmp2 = and i64 %tmp1, %y ret i64 %tmp2 @@ -72,6 +88,13 @@ define i1 @andn_cmp(i32 %x, i32 %y) { ; X64-NEXT: andnl %esi, %edi, %eax ; X64-NEXT: sete %al ; X64-NEXT: retq +; +; EGPR-LABEL: andn_cmp: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %notx = xor i32 %x, -1 %and = and i32 %notx, %y %cmp = icmp eq i32 %and, 0 @@ -92,6 +115,13 @@ define i1 @and_cmp1(i32 %x, i32 %y) { ; X64-NEXT: andnl %esi, %edi, %eax ; X64-NEXT: sete %al ; X64-NEXT: retq +; +; EGPR-LABEL: and_cmp1: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %x, %y %cmp = icmp eq i32 %and, %y ret i1 %cmp @@ -110,6 +140,13 @@ define i1 @and_cmp2(i32 %x, i32 %y) { ; X64-NEXT: andnl %esi, %edi, %eax ; X64-NEXT: setne %al ; X64-NEXT: retq +; +; EGPR-LABEL: and_cmp2: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %y, %x %cmp = icmp ne i32 %and, %y ret i1 %cmp @@ -128,6 +165,13 @@ define i1 @and_cmp3(i32 %x, i32 %y) { ; X64-NEXT: andnl %esi, %edi, %eax ; X64-NEXT: sete %al ; X64-NEXT: retq +; +; EGPR-LABEL: and_cmp3: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %x, %y %cmp = icmp eq i32 %y, %and ret i1 %cmp @@ -146,6 +190,13 @@ define i1 @and_cmp4(i32 %x, i32 %y) { ; X64-NEXT: andnl %esi, %edi, %eax ; X64-NEXT: setne %al ; X64-NEXT: retq +; +; EGPR-LABEL: and_cmp4: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %y, %x %cmp = icmp ne i32 %y, %and ret i1 %cmp @@ -168,6 +219,13 @@ define i1 @and_cmp_const(i32 %x) { ; X64-NEXT: testb $43, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq +; +; EGPR-LABEL: and_cmp_const: +; EGPR: # %bb.0: +; EGPR-NEXT: notl %edi # encoding: [0xf7,0xd7] +; EGPR-NEXT: testb $43, %dil # encoding: [0x40,0xf6,0xc7,0x2b] +; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %x, 43 %cmp = icmp eq i32 %and, 43 ret i1 %cmp @@ -188,6 +246,12 @@ define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) { ; X64-NEXT: btl %esi, %edi ; X64-NEXT: setae %al ; X64-NEXT: retq +; +; EGPR-LABEL: and_cmp_const_power_of_two: +; EGPR: # %bb.0: +; EGPR-NEXT: btl %esi, %edi # encoding: [0x0f,0xa3,0xf7] +; EGPR-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = shl i32 1, %y %and = and i32 %x, %shl %cmp = icmp ne i32 %and, %shl @@ -214,6 +278,15 @@ define i32 @and_cmp_not_one_use(i32 %x) { ; X64-NEXT: sete %al ; X64-NEXT: addl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: and_cmp_not_one_use: +; EGPR: # %bb.0: +; EGPR-NEXT: andl $37, %edi # encoding: [0x83,0xe7,0x25] +; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; EGPR-NEXT: cmpl $37, %edi # encoding: [0x83,0xff,0x25] +; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; EGPR-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %x, 37 %cmp = icmp eq i32 %and, 37 %ext = zext i1 %cmp to i32 @@ -238,6 +311,13 @@ define i1 @not_an_andn1(i32 %x, i32 %y) { ; X64-NEXT: cmpl %edi, %esi ; X64-NEXT: setg %al ; X64-NEXT: retq +; +; EGPR-LABEL: not_an_andn1: +; EGPR: # %bb.0: +; EGPR-NEXT: andl %esi, %edi # encoding: [0x21,0xf7] +; EGPR-NEXT: cmpl %edi, %esi # encoding: [0x39,0xfe] +; EGPR-NEXT: setg %al # encoding: [0x0f,0x9f,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %x, %y %cmp = icmp sgt i32 %y, %and ret i1 %cmp @@ -259,6 +339,13 @@ define i1 @not_an_andn2(i32 %x, i32 %y) { ; X64-NEXT: cmpl %edi, %esi ; X64-NEXT: setbe %al ; X64-NEXT: retq +; +; EGPR-LABEL: not_an_andn2: +; EGPR: # %bb.0: +; EGPR-NEXT: andl %esi, %edi # encoding: [0x21,0xf7] +; EGPR-NEXT: cmpl %edi, %esi # encoding: [0x39,0xfe] +; EGPR-NEXT: setbe %al # encoding: [0x0f,0x96,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %and = and i32 %y, %x %cmp = icmp ule i32 %y, %and ret i1 %cmp @@ -281,6 +368,13 @@ define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) { ; X64-NEXT: andnq %rsi, %rdi, %rax ; X64-NEXT: sete %al ; X64-NEXT: retq +; +; EGPR-LABEL: andn_cmp_swap_ops: +; EGPR: # %bb.0: +; EGPR-NEXT: andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6] +; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %notx = xor i64 %x, -1 %and = and i64 %y, %notx %cmp = icmp eq i64 %and, 0 @@ -303,6 +397,13 @@ define i1 @andn_cmp_i8(i8 %x, i8 %y) { ; X64-NEXT: testb %sil, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq +; +; EGPR-LABEL: andn_cmp_i8: +; EGPR: # %bb.0: +; EGPR-NEXT: notb %sil # encoding: [0x40,0xf6,0xd6] +; EGPR-NEXT: testb %sil, %dil # encoding: [0x40,0x84,0xf7] +; EGPR-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %noty = xor i8 %y, -1 %and = and i8 %x, %noty %cmp = icmp eq i8 %and, 0 @@ -323,6 +424,13 @@ define i1 @andn_cmp_i32_overflow(i32 %x, i32 %y) { ; X64-NEXT: andnl %edi, %esi, %eax ; X64-NEXT: setle %al ; X64-NEXT: retq +; +; EGPR-LABEL: andn_cmp_i32_overflow: +; EGPR: # %bb.0: +; EGPR-NEXT: andnl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf2,0xc7] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: setle %al # encoding: [0x0f,0x9e,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %noty = xor i32 %y, -1 %and = and i32 %x, %noty %cmp = icmp slt i32 %and, 1 @@ -342,6 +450,11 @@ define i32 @bextr32(i32 %x, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: bextrl %esi, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: bextr32: +; EGPR: # %bb.0: +; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y) ret i32 %tmp } @@ -358,6 +471,11 @@ define i32 @bextr32_load(ptr %x, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: bextrl %esi, (%rdi), %eax ; X64-NEXT: retq +; +; EGPR-LABEL: bextr32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: bextrl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = load i32, ptr %x %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y) ret i32 %tmp @@ -389,6 +507,13 @@ define i32 @bextr32b(i32 %x) uwtable ssp { ; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 ; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax ; X64-FAST-BEXTR-NEXT: retq +; +; EGPR-LABEL: bextr32b: +; EGPR: # %bb.0: +; EGPR-NEXT: movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00] +; EGPR-NEXT: # imm = 0xC04 +; EGPR-NEXT: bextrl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %1 = lshr i32 %x, 4 %2 = and i32 %1, 4095 ret i32 %2 @@ -406,6 +531,12 @@ define i32 @bextr32_subreg(i32 %x) uwtable ssp { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: movzbl %ah, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: bextr32_subreg: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: movzbl %ah, %eax # encoding: [0x0f,0xb6,0xc4] +; EGPR-NEXT: retq # encoding: [0xc3] %1 = lshr i32 %x, 8 %2 = and i32 %1, 255 ret i32 %2 @@ -439,6 +570,13 @@ define i32 @bextr32b_load(ptr %x) uwtable ssp { ; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 ; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax ; X64-FAST-BEXTR-NEXT: retq +; +; EGPR-LABEL: bextr32b_load: +; EGPR: # %bb.0: +; EGPR-NEXT: movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00] +; EGPR-NEXT: # imm = 0xC04 +; EGPR-NEXT: bextrl %eax, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %1 = load i32, ptr %x %2 = lshr i32 %1, 4 %3 = and i32 %2, 4095 @@ -457,6 +595,11 @@ define i32 @bextr32c(i32 %x, i16 zeroext %y) { ; X64: # %bb.0: ; X64-NEXT: bextrl %esi, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: bextr32c: +; EGPR: # %bb.0: +; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp0 = sext i16 %y to i32 %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0) ret i32 %tmp1 @@ -476,6 +619,13 @@ define i32 @non_bextr32(i32 %x) { ; X64-NEXT: shrl $2, %eax ; X64-NEXT: andl $111, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: non_bextr32: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: shrl $2, %eax # encoding: [0xc1,0xe8,0x02] +; EGPR-NEXT: andl $111, %eax # encoding: [0x83,0xe0,0x6f] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i32 %x, 2 %and = and i32 %shr, 111 @@ -492,6 +642,11 @@ define i32 @blsi32(i32 %x) { ; X64: # %bb.0: ; X64-NEXT: blsil %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi32: +; EGPR: # %bb.0: +; EGPR-NEXT: blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i32 0, %x %tmp2 = and i32 %x, %tmp ret i32 %tmp2 @@ -508,6 +663,11 @@ define i32 @blsi32_load(ptr %x) { ; X64: # %bb.0: ; X64-NEXT: blsil (%rdi), %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: blsil (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x1f] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = load i32, ptr %x %tmp = sub i32 0, %x1 %tmp2 = and i32 %x1, %tmp @@ -529,6 +689,13 @@ define i32 @blsi32_z(i32 %a, i32 %b) nounwind { ; X64-NEXT: blsil %edi, %eax ; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi32_z: +; EGPR: # %bb.0: +; EGPR-NEXT: blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 0, %a %t1 = and i32 %t0, %a %t2 = icmp eq i32 %t1, 0 @@ -552,6 +719,14 @@ define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; X64-NEXT: blsil %edi, %ecx ; X64-NEXT: cmovnel %edx, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi32_z2: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf] +; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] +; EGPR-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 0, %a %t1 = and i32 %t0, %a %t2 = icmp eq i32 %t1, 0 @@ -577,6 +752,14 @@ define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind { ; X64-NEXT: blsil %edi, %ecx ; X64-NEXT: cmovgl %edx, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi32_sle: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf] +; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] +; EGPR-NEXT: cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 0, %a %t1 = and i32 %t0, %a %t2 = icmp sle i32 %t1, 0 @@ -606,6 +789,11 @@ define i64 @blsi64(i64 %x) { ; X64: # %bb.0: ; X64-NEXT: blsiq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi64: +; EGPR: # %bb.0: +; EGPR-NEXT: blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i64 0, %x %tmp2 = and i64 %tmp, %x ret i64 %tmp2 @@ -638,6 +826,13 @@ define i64 @blsi64_z(i64 %a, i64 %b) nounwind { ; X64-NEXT: blsiq %rdi, %rax ; X64-NEXT: cmoveq %rsi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi64_z: +; EGPR: # %bb.0: +; EGPR-NEXT: blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf] +; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 0, %a %t1 = and i64 %t0, %a %t2 = icmp eq i64 %t1, 0 @@ -672,6 +867,14 @@ define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; X64-NEXT: blsiq %rdi, %rcx ; X64-NEXT: cmovneq %rdx, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi64_z2: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] +; EGPR-NEXT: blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf] +; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9] +; EGPR-NEXT: cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 0, %a %t1 = and i64 %t0, %a %t2 = icmp eq i64 %t1, 0 @@ -707,6 +910,14 @@ define i64 @blsi64_sle(i64 %a, i64 %b, i64 %c) nounwind { ; X64-NEXT: blsiq %rdi, %rcx ; X64-NEXT: cmovgq %rdx, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi64_sle: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] +; EGPR-NEXT: blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf] +; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9] +; EGPR-NEXT: cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 0, %a %t1 = and i64 %t0, %a %t2 = icmp sle i64 %t1, 0 @@ -724,6 +935,11 @@ define i32 @blsmsk32(i32 %x) { ; X64: # %bb.0: ; X64-NEXT: blsmskl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk32: +; EGPR: # %bb.0: +; EGPR-NEXT: blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i32 %x, 1 %tmp2 = xor i32 %x, %tmp ret i32 %tmp2 @@ -740,6 +956,11 @@ define i32 @blsmsk32_load(ptr %x) { ; X64: # %bb.0: ; X64-NEXT: blsmskl (%rdi), %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: blsmskl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x17] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = load i32, ptr %x %tmp = sub i32 %x1, 1 %tmp2 = xor i32 %x1, %tmp @@ -761,6 +982,13 @@ define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind { ; X64-NEXT: blsmskl %edi, %eax ; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk32_z: +; EGPR: # %bb.0: +; EGPR-NEXT: blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 %a, 1 %t1 = xor i32 %t0, %a %t2 = icmp eq i32 %t1, 0 @@ -784,6 +1012,13 @@ define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; X64-NEXT: blsmskl %edi, %ecx ; X64-NEXT: cmovnel %edx, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk32_z2: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7] +; EGPR-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 %a, 1 %t1 = xor i32 %t0, %a %t2 = icmp eq i32 %t1, 0 @@ -807,6 +1042,14 @@ define i32 @blsmsk32_sle(i32 %a, i32 %b, i32 %c) nounwind { ; X64-NEXT: blsmskl %edi, %ecx ; X64-NEXT: cmovgl %edx, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk32_sle: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7] +; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] +; EGPR-NEXT: cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 %a, 1 %t1 = xor i32 %t0, %a %t2 = icmp sle i32 %t1, 0 @@ -836,6 +1079,11 @@ define i64 @blsmsk64(i64 %x) { ; X64: # %bb.0: ; X64-NEXT: blsmskq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk64: +; EGPR: # %bb.0: +; EGPR-NEXT: blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i64 %x, 1 %tmp2 = xor i64 %tmp, %x ret i64 %tmp2 @@ -868,6 +1116,13 @@ define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind { ; X64-NEXT: blsmskq %rdi, %rax ; X64-NEXT: cmoveq %rsi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk64_z: +; EGPR: # %bb.0: +; EGPR-NEXT: blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7] +; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 %a, 1 %t1 = xor i64 %t0, %a %t2 = icmp eq i64 %t1, 0 @@ -902,6 +1157,13 @@ define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; X64-NEXT: blsmskq %rdi, %rcx ; X64-NEXT: cmovneq %rdx, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk64_z2: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] +; EGPR-NEXT: blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7] +; EGPR-NEXT: cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 %a, 1 %t1 = xor i64 %t0, %a %t2 = icmp eq i64 %t1, 0 @@ -937,6 +1199,14 @@ define i64 @blsmsk64_sle(i64 %a, i64 %b, i64 %c) nounwind { ; X64-NEXT: blsmskq %rdi, %rcx ; X64-NEXT: cmovgq %rdx, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsmsk64_sle: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] +; EGPR-NEXT: blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7] +; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9] +; EGPR-NEXT: cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 %a, 1 %t1 = xor i64 %t0, %a %t2 = icmp sle i64 %t1, 0 @@ -954,6 +1224,11 @@ define i32 @blsr32(i32 %x) { ; X64: # %bb.0: ; X64-NEXT: blsrl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr32: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i32 %x, 1 %tmp2 = and i32 %x, %tmp ret i32 %tmp2 @@ -970,6 +1245,11 @@ define i32 @blsr32_load(ptr %x) { ; X64: # %bb.0: ; X64-NEXT: blsrl (%rdi), %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x0f] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = load i32, ptr %x %tmp = sub i32 %x1, 1 %tmp2 = and i32 %x1, %tmp @@ -991,6 +1271,13 @@ define i32 @blsr32_z(i32 %a, i32 %b) nounwind { ; X64-NEXT: blsrl %edi, %eax ; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr32_z: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 %a, 1 %t1 = and i32 %t0, %a %t2 = icmp eq i32 %t1, 0 @@ -1014,6 +1301,14 @@ define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; X64-NEXT: blsrl %edi, %ecx ; X64-NEXT: cmovnel %edx, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr32_z2: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf] +; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] +; EGPR-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 %a, 1 %t1 = and i32 %t0, %a %t2 = icmp eq i32 %t1, 0 @@ -1037,6 +1332,14 @@ define i32 @blsr32_sle(i32 %a, i32 %b, i32 %c) nounwind { ; X64-NEXT: blsrl %edi, %ecx ; X64-NEXT: cmovgl %edx, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr32_sle: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; EGPR-NEXT: blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf] +; EGPR-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] +; EGPR-NEXT: cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i32 %a, 1 %t1 = and i32 %t0, %a %t2 = icmp sle i32 %t1, 0 @@ -1066,6 +1369,11 @@ define i64 @blsr64(i64 %x) { ; X64: # %bb.0: ; X64-NEXT: blsrq %rdi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr64: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i64 %x, 1 %tmp2 = and i64 %tmp, %x ret i64 %tmp2 @@ -1098,6 +1406,13 @@ define i64 @blsr64_z(i64 %a, i64 %b) nounwind { ; X64-NEXT: blsrq %rdi, %rax ; X64-NEXT: cmoveq %rsi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr64_z: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf] +; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 %a, 1 %t1 = and i64 %t0, %a %t2 = icmp eq i64 %t1, 0 @@ -1132,6 +1447,14 @@ define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; X64-NEXT: blsrq %rdi, %rcx ; X64-NEXT: cmovneq %rdx, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr64_z2: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] +; EGPR-NEXT: blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf] +; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9] +; EGPR-NEXT: cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 %a, 1 %t1 = and i64 %t0, %a %t2 = icmp eq i64 %t1, 0 @@ -1167,6 +1490,14 @@ define i64 @blsr64_sle(i64 %a, i64 %b, i64 %c) nounwind { ; X64-NEXT: blsrq %rdi, %rcx ; X64-NEXT: cmovgq %rdx, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr64_sle: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] +; EGPR-NEXT: blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf] +; EGPR-NEXT: testq %rcx, %rcx # encoding: [0x48,0x85,0xc9] +; EGPR-NEXT: cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] %t0 = sub i64 %a, 1 %t1 = and i64 %t0, %a %t2 = icmp sle i64 %t1, 0 @@ -1189,6 +1520,12 @@ define i64 @blsr_disguised_constant(i64 %x) { ; X64-NEXT: blsrl %edi, %eax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr_disguised_constant: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf] +; EGPR-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %a1 = and i64 %x, 65535 %a2 = add i64 %x, 65535 %r = and i64 %a1, %a2 @@ -1211,6 +1548,12 @@ define i64 @blsr_disguised_shrunk_add(i64 %x) { ; X64-NEXT: shrq $48, %rdi ; X64-NEXT: blsrl %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsr_disguised_shrunk_add: +; EGPR: # %bb.0: +; EGPR-NEXT: shrq $48, %rdi # encoding: [0x48,0xc1,0xef,0x30] +; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf] +; EGPR-NEXT: retq # encoding: [0xc3] %a = lshr i64 %x, 48 %b = add i64 %a, -1 %c = and i64 %b, %a @@ -1234,6 +1577,16 @@ define void @pr40060(i32, i32) { ; X64-NEXT: jns bar # TAILCALL ; X64-NEXT: # %bb.1: ; X64-NEXT: retq +; +; EGPR-LABEL: pr40060: +; EGPR: # %bb.0: +; EGPR-NEXT: bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: jns bar # TAILCALL +; EGPR-NEXT: # encoding: [0x79,A] +; EGPR-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; EGPR-NEXT: # %bb.1: +; EGPR-NEXT: retq # encoding: [0xc3] %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1) %4 = icmp sgt i32 %3, -1 br i1 %4, label %5, label %6 @@ -1274,6 +1627,23 @@ define i32 @blsr32_branch(i32 %x) { ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq +; +; EGPR-LABEL: blsr32_branch: +; EGPR: # %bb.0: +; EGPR-NEXT: pushq %rbx # encoding: [0x53] +; EGPR-NEXT: .cfi_def_cfa_offset 16 +; EGPR-NEXT: .cfi_offset %rbx, -16 +; EGPR-NEXT: blsrl %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xcf] +; EGPR-NEXT: jne .LBB53_2 # encoding: [0x75,A] +; EGPR-NEXT: # fixup A - offset: 1, value: .LBB53_2-1, kind: FK_PCRel_1 +; EGPR-NEXT: # %bb.1: +; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A] +; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel +; EGPR-NEXT: .LBB53_2: +; EGPR-NEXT: movl %ebx, %eax # encoding: [0x89,0xd8] +; EGPR-NEXT: popq %rbx # encoding: [0x5b] +; EGPR-NEXT: .cfi_def_cfa_offset 8 +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i32 %x, 1 %tmp2 = and i32 %x, %tmp %cmp = icmp eq i32 %tmp2, 0 @@ -1329,6 +1699,23 @@ define i64 @blsr64_branch(i64 %x) { ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq +; +; EGPR-LABEL: blsr64_branch: +; EGPR: # %bb.0: +; EGPR-NEXT: pushq %rbx # encoding: [0x53] +; EGPR-NEXT: .cfi_def_cfa_offset 16 +; EGPR-NEXT: .cfi_offset %rbx, -16 +; EGPR-NEXT: blsrq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xcf] +; EGPR-NEXT: jne .LBB54_2 # encoding: [0x75,A] +; EGPR-NEXT: # fixup A - offset: 1, value: .LBB54_2-1, kind: FK_PCRel_1 +; EGPR-NEXT: # %bb.1: +; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A] +; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel +; EGPR-NEXT: .LBB54_2: +; EGPR-NEXT: movq %rbx, %rax # encoding: [0x48,0x89,0xd8] +; EGPR-NEXT: popq %rbx # encoding: [0x5b] +; EGPR-NEXT: .cfi_def_cfa_offset 8 +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i64 %x, 1 %tmp2 = and i64 %x, %tmp %cmp = icmp eq i64 %tmp2, 0 @@ -1369,6 +1756,23 @@ define i32 @blsi32_branch(i32 %x) { ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq +; +; EGPR-LABEL: blsi32_branch: +; EGPR: # %bb.0: +; EGPR-NEXT: pushq %rbx # encoding: [0x53] +; EGPR-NEXT: .cfi_def_cfa_offset 16 +; EGPR-NEXT: .cfi_offset %rbx, -16 +; EGPR-NEXT: blsil %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xdf] +; EGPR-NEXT: jne .LBB55_2 # encoding: [0x75,A] +; EGPR-NEXT: # fixup A - offset: 1, value: .LBB55_2-1, kind: FK_PCRel_1 +; EGPR-NEXT: # %bb.1: +; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A] +; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel +; EGPR-NEXT: .LBB55_2: +; EGPR-NEXT: movl %ebx, %eax # encoding: [0x89,0xd8] +; EGPR-NEXT: popq %rbx # encoding: [0x5b] +; EGPR-NEXT: .cfi_def_cfa_offset 8 +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i32 0, %x %tmp2 = and i32 %x, %tmp %cmp = icmp eq i32 %tmp2, 0 @@ -1424,6 +1828,23 @@ define i64 @blsi64_branch(i64 %x) { ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq +; +; EGPR-LABEL: blsi64_branch: +; EGPR: # %bb.0: +; EGPR-NEXT: pushq %rbx # encoding: [0x53] +; EGPR-NEXT: .cfi_def_cfa_offset 16 +; EGPR-NEXT: .cfi_offset %rbx, -16 +; EGPR-NEXT: blsiq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xdf] +; EGPR-NEXT: jne .LBB56_2 # encoding: [0x75,A] +; EGPR-NEXT: # fixup A - offset: 1, value: .LBB56_2-1, kind: FK_PCRel_1 +; EGPR-NEXT: # %bb.1: +; EGPR-NEXT: callq bar # encoding: [0xe8,A,A,A,A] +; EGPR-NEXT: # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel +; EGPR-NEXT: .LBB56_2: +; EGPR-NEXT: movq %rbx, %rax # encoding: [0x48,0x89,0xd8] +; EGPR-NEXT: popq %rbx # encoding: [0x5b] +; EGPR-NEXT: .cfi_def_cfa_offset 8 +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i64 0, %x %tmp2 = and i64 %x, %tmp %cmp = icmp eq i64 %tmp2, 0 @@ -1450,6 +1871,16 @@ define void @pr42118_i32(i32 %x) { ; X64-NEXT: je bar # TAILCALL ; X64-NEXT: # %bb.1: ; X64-NEXT: retq +; +; EGPR-LABEL: pr42118_i32: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: je bar # TAILCALL +; EGPR-NEXT: # encoding: [0x74,A] +; EGPR-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; EGPR-NEXT: # %bb.1: +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i32 0, %x %tmp1 = and i32 %tmp, %x %cmp = icmp eq i32 %tmp1, %x @@ -1493,6 +1924,16 @@ define void @pr42118_i64(i64 %x) { ; X64-NEXT: je bar # TAILCALL ; X64-NEXT: # %bb.1: ; X64-NEXT: retq +; +; EGPR-LABEL: pr42118_i64: +; EGPR: # %bb.0: +; EGPR-NEXT: blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf] +; EGPR-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; EGPR-NEXT: je bar # TAILCALL +; EGPR-NEXT: # encoding: [0x74,A] +; EGPR-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; EGPR-NEXT: # %bb.1: +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sub i64 0, %x %tmp1 = and i64 %tmp, %x %cmp = icmp eq i64 %tmp1, %x @@ -1522,6 +1963,13 @@ define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind { ; X64-NEXT: blsil %edi, %eax ; X64-NEXT: cmovael %esi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi_cflag_32: +; EGPR: # %bb.0: +; EGPR-NEXT: blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf] +; EGPR-NEXT: testl %edi, %edi # encoding: [0x85,0xff] +; EGPR-NEXT: cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %tobool = icmp eq i32 %x, 0 %sub = sub nsw i32 0, %x %and = and i32 %sub, %x @@ -1560,6 +2008,13 @@ define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind { ; X64-NEXT: blsiq %rdi, %rax ; X64-NEXT: cmovaeq %rsi, %rax ; X64-NEXT: retq +; +; EGPR-LABEL: blsi_cflag_64: +; EGPR: # %bb.0: +; EGPR-NEXT: blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf] +; EGPR-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff] +; EGPR-NEXT: cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %tobool = icmp eq i64 %x, 0 %sub = sub nsw i64 0, %x %and = and i64 %sub, %x diff --git a/llvm/test/CodeGen/X86/bmi2-x86_64.ll b/llvm/test/CodeGen/X86/bmi2-x86_64.ll index 214c4eeb1f4c59..fa1c67986e11f2 100644 --- a/llvm/test/CodeGen/X86/bmi2-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi2-x86_64.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,egpr --show-mc-encoding | FileCheck %s --check-prefixes=EGPR define i64 @bzhi64(i64 %x, i64 %y) { ; CHECK-LABEL: bzhi64: ; CHECK: # %bb.0: ; CHECK-NEXT: bzhiq %rsi, %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: bzhi64: +; EGPR: # %bb.0: +; EGPR-NEXT: bzhiq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf5,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y) ret i64 %tmp } @@ -15,6 +21,11 @@ define i64 @bzhi64_load(ptr %x, i64 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: bzhiq %rsi, (%rdi), %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: bzhi64_load: +; EGPR: # %bb.0: +; EGPR-NEXT: bzhiq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf5,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = load i64, ptr %x %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x1, i64 %y) ret i64 %tmp @@ -27,6 +38,11 @@ define i64 @pdep64(i64 %x, i64 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: pdepq %rsi, %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: pdep64: +; EGPR: # %bb.0: +; EGPR-NEXT: pdepq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y) ret i64 %tmp } @@ -36,6 +52,11 @@ define i64 @pdep64_load(i64 %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: pdepq (%rsi), %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: pdep64_load: +; EGPR: # %bb.0: +; EGPR-NEXT: pdepq (%rsi), %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = load i64, ptr %y %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y1) ret i64 %tmp @@ -48,6 +69,14 @@ define i64 @pdep64_anyext(i32 %x) { ; CHECK-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555 ; CHECK-NEXT: pdepq %rax, %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: pdep64_anyext: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $edi killed $edi def $rdi +; EGPR-NEXT: movabsq $6148914691236517205, %rax # encoding: [0x48,0xb8,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55] +; EGPR-NEXT: # imm = 0x5555555555555555 +; EGPR-NEXT: pdepq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = sext i32 %x to i64 %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x1, i64 6148914691236517205) ret i64 %tmp @@ -60,6 +89,11 @@ define i64 @pext64(i64 %x, i64 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: pextq %rsi, %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: pext64: +; EGPR: # %bb.0: +; EGPR-NEXT: pextq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y) ret i64 %tmp } @@ -69,6 +103,11 @@ define i64 @pext64_load(i64 %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: pextq (%rsi), %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: pext64_load: +; EGPR: # %bb.0: +; EGPR-NEXT: pextq (%rsi), %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = load i64, ptr %y %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y1) ret i64 %tmp @@ -80,6 +119,13 @@ define i64 @pext64_knownbits(i64 %x, i64 %y) { ; CHECK-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555 ; CHECK-NEXT: pextq %rax, %rdi, %rax ; CHECK-NEXT: retq +; +; EGPR-LABEL: pext64_knownbits: +; EGPR: # %bb.0: +; EGPR-NEXT: movabsq $6148914691236517205, %rax # encoding: [0x48,0xb8,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55] +; EGPR-NEXT: # imm = 0x5555555555555555 +; EGPR-NEXT: pextq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 6148914691236517205) %tmp2 = and i64 %tmp, 4294967295 ret i64 %tmp2 @@ -95,6 +141,14 @@ define i64 @mulx64(i64 %x, i64 %y, ptr %p) { ; CHECK-NEXT: mulxq %rsi, %rax, %rdx ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq +; +; EGPR-LABEL: mulx64: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdx, %rcx # encoding: [0x48,0x89,0xd1] +; EGPR-NEXT: movq %rdi, %rdx # encoding: [0x48,0x89,0xfa] +; EGPR-NEXT: mulxq %rsi, %rax, %rdx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfb,0xf6,0xd6] +; EGPR-NEXT: movq %rdx, (%rcx) # encoding: [0x48,0x89,0x11] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = zext i64 %x to i128 %y1 = zext i64 %y to i128 %r1 = mul i128 %x1, %y1 @@ -113,6 +167,14 @@ define i64 @mulx64_load(i64 %x, ptr %y, ptr %p) { ; CHECK-NEXT: mulxq (%rsi), %rax, %rdx ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq +; +; EGPR-LABEL: mulx64_load: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdx, %rcx # encoding: [0x48,0x89,0xd1] +; EGPR-NEXT: movq %rdi, %rdx # encoding: [0x48,0x89,0xfa] +; EGPR-NEXT: mulxq (%rsi), %rax, %rdx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfb,0xf6,0x16] +; EGPR-NEXT: movq %rdx, (%rcx) # encoding: [0x48,0x89,0x11] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = load i64, ptr %y %x2 = zext i64 %x to i128 %y2 = zext i64 %y1 to i128 diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll index 24e38cfeb704df..cabeebb0c3f366 100644 --- a/llvm/test/CodeGen/X86/bmi2.ll +++ b/llvm/test/CodeGen/X86/bmi2.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+cmov | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define i32 @bzhi32(i32 %x, i32 %y) { ; X86-LABEL: bzhi32: @@ -16,6 +17,12 @@ define i32 @bzhi32(i32 %x, i32 %y) { ; X64-NEXT: addl %edi, %edi ; X64-NEXT: bzhil %esi, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: bzhi32: +; EGPR: # %bb.0: +; EGPR-NEXT: addl %edi, %edi # encoding: [0x01,0xff] +; EGPR-NEXT: bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = add i32 %x, %x %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y) ret i32 %tmp @@ -33,6 +40,11 @@ define i32 @bzhi32_load(ptr %x, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: bzhil %esi, (%rdi), %eax ; X64-NEXT: retq +; +; EGPR-LABEL: bzhi32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: bzhil %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = load i32, ptr %x %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y) ret i32 %tmp @@ -52,6 +64,13 @@ define i1 @bzhi32_overflow(i32 %x, i32 %y) { ; X64-NEXT: bzhil %esi, %edi, %eax ; X64-NEXT: setle %al ; X64-NEXT: retq +; +; EGPR-LABEL: bzhi32_overflow: +; EGPR: # %bb.0: +; EGPR-NEXT: bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7] +; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; EGPR-NEXT: setle %al # encoding: [0x0f,0x9e,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y) %cmp = icmp slt i32 %tmp, 1 ret i1 %cmp @@ -73,6 +92,12 @@ define i32 @pdep32(i32 %x, i32 %y) { ; X64-NEXT: addl %esi, %esi ; X64-NEXT: pdepl %esi, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32: +; EGPR: # %bb.0: +; EGPR-NEXT: addl %esi, %esi # encoding: [0x01,0xf6] +; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = add i32 %y, %y %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1) ret i32 %tmp @@ -90,6 +115,11 @@ define i32 @pdep32_load(i32 %x, ptr %y) { ; X64: # %bb.0: ; X64-NEXT: pdepl (%rsi), %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: pdepl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = load i32, ptr %y %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1) ret i32 %tmp @@ -108,6 +138,13 @@ define i32 @pdep32_anyext(i16 %x) { ; X64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; X64-NEXT: pdepl %eax, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_anyext: +; EGPR: # %bb.0: +; EGPR-NEXT: movl $-1431655766, %eax # encoding: [0xb8,0xaa,0xaa,0xaa,0xaa] +; EGPR-NEXT: # imm = 0xAAAAAAAA +; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = sext i16 %x to i32 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766) ret i32 %tmp @@ -126,6 +163,13 @@ define i32 @pdep32_demandedbits(i32 %x) { ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555 ; X64-NEXT: pdepl %eax, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_demandedbits: +; EGPR: # %bb.0: +; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55] +; EGPR-NEXT: # imm = 0x55555555 +; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765) %tmp2 = and i32 %tmp, 1431655765 ret i32 %tmp2 @@ -144,6 +188,12 @@ define i32 @pdep32_demandedbits2(i32 %x, i32 %y) { ; X64-NEXT: pdepl %esi, %edi, %eax ; X64-NEXT: andl $128, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_demandedbits2: +; EGPR: # %bb.0: +; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6] +; EGPR-NEXT: andl $128, %eax # encoding: [0x25,0x80,0x00,0x00,0x00] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = and i32 %x, 255 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y) %tmp3 = and i32 %tmp2, 128 @@ -164,6 +214,13 @@ define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) { ; X64-NEXT: pdepl %esi, %edi, %eax ; X64-NEXT: andl $32768, %eax # imm = 0x8000 ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_demandedbits_mask: +; EGPR: # %bb.0: +; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6] +; EGPR-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] +; EGPR-NEXT: # imm = 0x8000 +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sext i16 %y to i32 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp) %tmp3 = and i32 %tmp2, 32768 @@ -184,6 +241,12 @@ define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) { ; X64-NEXT: pdepl %esi, %edi, %eax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_demandedbits_mask2: +; EGPR: # %bb.0: +; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6] +; EGPR-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = sext i16 %y to i32 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp) %tmp3 = and i32 %tmp2, 65535 @@ -205,6 +268,14 @@ define i32 @pdep32_knownbits(i32 %x) { ; X64-NEXT: pdepl %eax, %edi, %eax ; X64-NEXT: imull %eax, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_knownbits: +; EGPR: # %bb.0: +; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55] +; EGPR-NEXT: # imm = 0x55555555 +; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0] +; EGPR-NEXT: imull %eax, %eax # encoding: [0x0f,0xaf,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765) %tmp2 = and i32 %tmp, 1431655765 %tmp3 = mul i32 %tmp, %tmp2 @@ -226,6 +297,13 @@ define i32 @pdep32_knownbits2(i32 %x, i32 %y) { ; X64-NEXT: pdepl %esi, %edi, %eax ; X64-NEXT: imull %eax, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pdep32_knownbits2: +; EGPR: # %bb.0: +; EGPR-NEXT: andl $-256, %edi # encoding: [0x81,0xe7,0x00,0xff,0xff,0xff] +; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6] +; EGPR-NEXT: imull %eax, %eax # encoding: [0x0f,0xaf,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = and i32 %x, -256 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y) %tmp3 = and i32 %tmp2, -256 @@ -249,6 +327,12 @@ define i32 @pext32(i32 %x, i32 %y) { ; X64-NEXT: addl %esi, %esi ; X64-NEXT: pextl %esi, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pext32: +; EGPR: # %bb.0: +; EGPR-NEXT: addl %esi, %esi # encoding: [0x01,0xf6] +; EGPR-NEXT: pextl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = add i32 %y, %y %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1) ret i32 %tmp @@ -266,6 +350,11 @@ define i32 @pext32_load(i32 %x, ptr %y) { ; X64: # %bb.0: ; X64-NEXT: pextl (%rsi), %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pext32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: pextl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] %y1 = load i32, ptr %y %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1) ret i32 %tmp @@ -284,6 +373,13 @@ define i32 @pext32_knownbits(i32 %x) { ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555 ; X64-NEXT: pextl %eax, %edi, %eax ; X64-NEXT: retq +; +; EGPR-LABEL: pext32_knownbits: +; EGPR: # %bb.0: +; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55] +; EGPR-NEXT: # imm = 0x55555555 +; EGPR-NEXT: pextl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc0] +; EGPR-NEXT: retq # encoding: [0xc3] %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 1431655765) %tmp2 = and i32 %tmp, 65535 ret i32 %tmp2 @@ -315,6 +411,19 @@ define i32 @mulx32(i32 %x, i32 %y, ptr %p) { ; X64-NEXT: movl %ecx, (%rdx) ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq +; +; EGPR-LABEL: mulx32: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $esi killed $esi def $rsi +; EGPR-NEXT: # kill: def $edi killed $edi def $rdi +; EGPR-NEXT: addl %edi, %edi # encoding: [0x01,0xff] +; EGPR-NEXT: leal (%rsi,%rsi), %eax # encoding: [0x8d,0x04,0x36] +; EGPR-NEXT: imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7] +; EGPR-NEXT: movq %rax, %rcx # encoding: [0x48,0x89,0xc1] +; EGPR-NEXT: shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20] +; EGPR-NEXT: movl %ecx, (%rdx) # encoding: [0x89,0x0a] +; EGPR-NEXT: # kill: def $eax killed $eax killed $rax +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = add i32 %x, %x %y1 = add i32 %y, %y %x2 = zext i32 %x1 to i64 @@ -349,6 +458,18 @@ define i32 @mulx32_load(i32 %x, ptr %y, ptr %p) { ; X64-NEXT: movl %ecx, (%rdx) ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq +; +; EGPR-LABEL: mulx32_load: +; EGPR: # %bb.0: +; EGPR-NEXT: # kill: def $edi killed $edi def $rdi +; EGPR-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f] +; EGPR-NEXT: movl (%rsi), %ecx # encoding: [0x8b,0x0e] +; EGPR-NEXT: imulq %rcx, %rax # encoding: [0x48,0x0f,0xaf,0xc1] +; EGPR-NEXT: movq %rax, %rcx # encoding: [0x48,0x89,0xc1] +; EGPR-NEXT: shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20] +; EGPR-NEXT: movl %ecx, (%rdx) # encoding: [0x89,0x0a] +; EGPR-NEXT: # kill: def $eax killed $eax killed $rax +; EGPR-NEXT: retq # encoding: [0xc3] %x1 = add i32 %x, %x %y1 = load i32, ptr %y %x2 = zext i32 %x1 to i64 diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll index db00e1c49dca7b..bb0213891c9768 100644 --- a/llvm/test/CodeGen/X86/shift-bmi2.ll +++ b/llvm/test/CodeGen/X86/shift-bmi2.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 -mattr=+egpr --show-mc-encoding < %s | FileCheck --check-prefix=EGPR %s define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: shl32: @@ -13,6 +14,11 @@ define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shlxl %esi, %edi, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl32: +; EGPR: # %bb.0: +; EGPR-NEXT: shlxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x49,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = shl i32 %x, %shamt ret i32 %shl } @@ -29,6 +35,12 @@ define i32 @shl32i(i32 %x) nounwind uwtable readnone { ; BMI264-NEXT: movl %edi, %eax ; BMI264-NEXT: shll $5, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl32i: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: shll $5, %eax # encoding: [0xc1,0xe0,0x05] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = shl i32 %x, 5 ret i32 %shl } @@ -45,6 +57,11 @@ define i32 @shl32p(ptr %p, i32 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shlxl %esi, (%rdi), %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl32p: +; EGPR: # %bb.0: +; EGPR-NEXT: shlxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x49,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i32, ptr %p %shl = shl i32 %x, %shamt ret i32 %shl @@ -63,6 +80,12 @@ define i32 @shl32pi(ptr %p) nounwind uwtable readnone { ; BMI264-NEXT: movl (%rdi), %eax ; BMI264-NEXT: shll $5, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl32pi: +; EGPR: # %bb.0: +; EGPR-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; EGPR-NEXT: shll $5, %eax # encoding: [0xc1,0xe0,0x05] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i32, ptr %p %shl = shl i32 %x, 5 ret i32 %shl @@ -91,6 +114,11 @@ define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rsi, %rdi, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl64: +; EGPR: # %bb.0: +; EGPR-NEXT: shlxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc9,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = shl i64 %x, %shamt ret i64 %shl } @@ -109,6 +137,12 @@ define i64 @shl64i(i64 %x) nounwind uwtable readnone { ; BMI264-NEXT: movq %rdi, %rax ; BMI264-NEXT: shlq $7, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl64i: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: shlq $7, %rax # encoding: [0x48,0xc1,0xe0,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = shl i64 %x, 7 ret i64 %shl } @@ -137,6 +171,11 @@ define i64 @shl64p(ptr %p, i64 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rsi, (%rdi), %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl64p: +; EGPR: # %bb.0: +; EGPR-NEXT: shlxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc9,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i64, ptr %p %shl = shl i64 %x, %shamt ret i64 %shl @@ -157,6 +196,12 @@ define i64 @shl64pi(ptr %p) nounwind uwtable readnone { ; BMI264-NEXT: movq (%rdi), %rax ; BMI264-NEXT: shlq $7, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl64pi: +; EGPR: # %bb.0: +; EGPR-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] +; EGPR-NEXT: shlq $7, %rax # encoding: [0x48,0xc1,0xe0,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i64, ptr %p %shl = shl i64 %x, 7 ret i64 %shl @@ -173,6 +218,11 @@ define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shrxl %esi, %edi, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: lshr32: +; EGPR: # %bb.0: +; EGPR-NEXT: shrxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4b,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = lshr i32 %x, %shamt ret i32 %shl } @@ -189,6 +239,11 @@ define i32 @lshr32p(ptr %p, i32 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shrxl %esi, (%rdi), %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: lshr32p: +; EGPR: # %bb.0: +; EGPR-NEXT: shrxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4b,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i32, ptr %p %shl = lshr i32 %x, %shamt ret i32 %shl @@ -217,6 +272,11 @@ define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rsi, %rdi, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: lshr64: +; EGPR: # %bb.0: +; EGPR-NEXT: shrxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xcb,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = lshr i64 %x, %shamt ret i64 %shl } @@ -245,6 +305,11 @@ define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rsi, (%rdi), %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: lshr64p: +; EGPR: # %bb.0: +; EGPR-NEXT: shrxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xcb,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i64, ptr %p %shl = lshr i64 %x, %shamt ret i64 %shl @@ -261,6 +326,11 @@ define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: sarxl %esi, %edi, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: ashr32: +; EGPR: # %bb.0: +; EGPR-NEXT: sarxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4a,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = ashr i32 %x, %shamt ret i32 %shl } @@ -277,6 +347,11 @@ define i32 @ashr32p(ptr %p, i32 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: sarxl %esi, (%rdi), %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: ashr32p: +; EGPR: # %bb.0: +; EGPR-NEXT: sarxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4a,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i32, ptr %p %shl = ashr i32 %x, %shamt ret i32 %shl @@ -305,6 +380,11 @@ define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rsi, %rdi, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: ashr64: +; EGPR: # %bb.0: +; EGPR-NEXT: sarxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xca,0xf7,0xc7] +; EGPR-NEXT: retq # encoding: [0xc3] %shl = ashr i64 %x, %shamt ret i64 %shl } @@ -333,6 +413,11 @@ define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone { ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rsi, (%rdi), %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: ashr64p: +; EGPR: # %bb.0: +; EGPR-NEXT: sarxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xca,0xf7,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] %x = load i64, ptr %p %shl = ashr i64 %x, %shamt ret i64 %shl @@ -349,6 +434,11 @@ define i32 @shl32and(i32 %t, i32 %val) nounwind { ; BMI264: # %bb.0: ; BMI264-NEXT: shlxl %edi, %esi, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl32and: +; EGPR: # %bb.0: +; EGPR-NEXT: shlxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x41,0xf7,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %shamt = and i32 %t, 31 %res = shl i32 %val, %shamt ret i32 %res @@ -374,6 +464,11 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind { ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rdi, %rsi, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: shl64and: +; EGPR: # %bb.0: +; EGPR-NEXT: shlxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc1,0xf7,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %shamt = and i64 %t, 63 %res = shl i64 %val, %shamt ret i64 %res @@ -390,6 +485,11 @@ define i32 @lshr32and(i32 %t, i32 %val) nounwind { ; BMI264: # %bb.0: ; BMI264-NEXT: shrxl %edi, %esi, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: lshr32and: +; EGPR: # %bb.0: +; EGPR-NEXT: shrxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf7,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %shamt = and i32 %t, 31 %res = lshr i32 %val, %shamt ret i32 %res @@ -415,6 +515,11 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind { ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rdi, %rsi, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: lshr64and: +; EGPR: # %bb.0: +; EGPR-NEXT: shrxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf7,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %shamt = and i64 %t, 63 %res = lshr i64 %val, %shamt ret i64 %res @@ -431,6 +536,11 @@ define i32 @ashr32and(i32 %t, i32 %val) nounwind { ; BMI264: # %bb.0: ; BMI264-NEXT: sarxl %edi, %esi, %eax ; BMI264-NEXT: retq +; +; EGPR-LABEL: ashr32and: +; EGPR: # %bb.0: +; EGPR-NEXT: sarxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf7,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %shamt = and i32 %t, 31 %res = ashr i32 %val, %shamt ret i32 %res @@ -456,6 +566,11 @@ define i64 @ashr64and(i64 %t, i64 %val) nounwind { ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rdi, %rsi, %rax ; BMI264-NEXT: retq +; +; EGPR-LABEL: ashr64and: +; EGPR: # %bb.0: +; EGPR-NEXT: sarxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf7,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %shamt = and i64 %t, 63 %res = ashr i64 %val, %shamt ret i64 %res