Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// The hardware supports 32-bit FSHR, but not FSHL.
setOperationAction(ISD::FSHR, MVT::i32, Legal);

// The hardware supports 32-bit ROTR, but not ROTL.
setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
setOperationAction({ISD::ROTL, ISD::ROTR}, {MVT::i32, MVT::i64}, Expand);

setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand);

Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -806,12 +806,6 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
(vt rc:$addr)
>;

// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(rotr i32:$src0, i32:$src1),
(BIT_ALIGN $src0, $src0, $src1)
>;

// Special conversion patterns

def cvt_rpi_i32_f32 : PatFrag <
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/EvergreenInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,6 @@ def : AMDGPUPat <
(fshr i32:$src0, i32:$src1, i32:$src2),
(BIT_ALIGN_INT_eg $src0, $src1, $src2)
>;
def : ROTRPattern <BIT_ALIGN_INT_eg>;
def MULADD_eg : MULADD_Common<0x14>;
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
def FMA_eg : FMA_Common<0x7>;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14032,6 +14032,12 @@ static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
assert(OtherOp.getValueSizeInBits() == 32);
}

// Check that we haven't just recreated the same FSHR node.
if (N->getOpcode() == ISD::FSHR &&
(N->getOperand(0) == Op || N->getOperand(0) == OtherOp) &&
(N->getOperand(1) == Op || N->getOperand(1) == OtherOp))
return SDValue();

if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {

assert(Op.getValueType().isByteSized() &&
Expand Down
27 changes: 0 additions & 27 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2685,8 +2685,6 @@ def : AMDGPUPat <

let True16Predicate = NotHasTrue16BitInsts in {
let SubtargetPredicate = isNotGFX9Plus in {
def : ROTRPattern <V_ALIGNBIT_B32_e64>;

def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (and i32:$src1, (i32 31))))),
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
Expand All @@ -2697,14 +2695,6 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
} // isNotGFX9Plus

let SubtargetPredicate = isGFX9GFX10 in {
def : GCNPat <
(rotr i32:$src0, i32:$src1),
(V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src0,
/* src2_modifiers */ 0,
$src1, /* clamp */ 0, /* op_sel */ 0)
>;

foreach pat = [(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (and i32:$src1, (i32 31))))),
(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:$src1))))] in
def : GCNPat<pat,
Expand All @@ -2726,15 +2716,6 @@ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
} // end True16Predicate = NotHasTrue16BitInsts

let True16Predicate = UseRealTrue16Insts in {
def : GCNPat <
(rotr i32:$src0, i32:$src1),
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src0,
/* src2_modifiers */ 0,
(EXTRACT_SUBREG $src1, lo16),
/* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
(V_ALIGNBIT_B32_t16_e64 0, /* src0_modifiers */
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
Expand All @@ -2753,14 +2734,6 @@ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
} // end True16Predicate = UseRealTrue16Insts

let True16Predicate = UseFakeTrue16Insts in {
def : GCNPat <
(rotr i32:$src0, i32:$src1),
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src0,
/* src2_modifiers */ 0,
$src1, /* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (and i32:$src1, (i32 31))))),
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
Expand Down
38 changes: 16 additions & 22 deletions llvm/test/CodeGen/AMDGPU/packetizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,37 @@
define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) {
; R600-LABEL: test:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: ADD_INT T0.Y, KC0[3].X, 1,
; R600-NEXT: ADD_INT T0.Z, KC0[3].Y, 1,
; R600-NEXT: ADD_INT T0.W, KC0[2].Z, 1,
; R600-NEXT: ADD_INT * T1.W, KC0[2].W, 1,
; R600-NEXT: BIT_ALIGN_INT T0.X, PS, PS, KC0[3].Z,
; R600-NEXT: BIT_ALIGN_INT T1.Y, PV.W, PV.W, KC0[3].Z,
; R600-NEXT: BIT_ALIGN_INT T0.Z, PV.Z, PV.Z, KC0[3].Z,
; R600-NEXT: BIT_ALIGN_INT * T0.W, PV.Y, PV.Y, KC0[3].Z,
; R600-NEXT: OR_INT T0.W, PV.W, PV.Z,
; R600-NEXT: OR_INT * T1.W, PV.Y, PV.X,
; R600-NEXT: OR_INT T0.X, PS, PV.W,
; R600-NEXT: ADD_INT T0.Y, KC0[2].W, 1,
; R600-NEXT: ADD_INT T0.Z, KC0[2].Z, 1,
; R600-NEXT: ADD_INT T0.W, KC0[3].Y, 1,
; R600-NEXT: ADD_INT * T1.W, KC0[3].X, 1,
; R600-NEXT: OR_INT T0.W, PS, PV.W,
; R600-NEXT: OR_INT * T1.W, PV.Z, PV.Y,
; R600-NEXT: OR_INT * T0.W, PS, PV.W,
; R600-NEXT: BIT_ALIGN_INT T0.X, PV.W, PV.W, KC0[3].Z,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: test:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
; CM-NEXT: ADD_INT T0.X, KC0[3].X, 1,
; CM-NEXT: ADD_INT T0.Y, KC0[3].Y, 1,
; CM-NEXT: ADD_INT T0.Z, KC0[2].Z, 1,
; CM-NEXT: ADD_INT * T0.W, KC0[2].W, 1,
; CM-NEXT: BIT_ALIGN_INT T1.X, PV.W, PV.W, KC0[3].Z,
; CM-NEXT: BIT_ALIGN_INT T1.Y, PV.Z, PV.Z, KC0[3].Z,
; CM-NEXT: BIT_ALIGN_INT T0.Z, PV.Y, PV.Y, KC0[3].Z,
; CM-NEXT: BIT_ALIGN_INT * T0.W, PV.X, PV.X, KC0[3].Z,
; CM-NEXT: ADD_INT T0.X, KC0[2].W, 1,
; CM-NEXT: ADD_INT T0.Y, KC0[2].Z, 1,
; CM-NEXT: ADD_INT T0.Z, KC0[3].Y, 1,
; CM-NEXT: ADD_INT * T0.W, KC0[3].X, 1,
; CM-NEXT: OR_INT T0.Z, PV.W, PV.Z,
; CM-NEXT: OR_INT * T0.W, PV.Y, PV.X,
; CM-NEXT: OR_INT * T0.X, PV.W, PV.Z,
; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
; CM-NEXT: BIT_ALIGN_INT * T0.X, PV.W, PV.W, KC0[3].Z,
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/permute.ll
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,13 @@ define amdgpu_kernel void @lsh8_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %a
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_load_dword s2, s[4:5], 0x2c
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v3, 0x2010007
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_add_u32_e32 v0, vcc, s0, v0
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GCN-NEXT: flat_load_dword v2, v[0:1]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_perm_b32 v2, s2, v2, v3
; GCN-NEXT: v_alignbit_b32 v2, v2, s2, 24
; GCN-NEXT: flat_store_dword v[0:1], v2
; GCN-NEXT: s_endpgm
bb:
Expand Down
Loading
Loading