diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c8a4dc6e67908..80db53f02ba83 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3874,6 +3874,14 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(LowBits); break; } + case ISD::CTLS: { + unsigned MinRedundantSignBits = + ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1) - 1; + ConstantRange Range(APInt(BitWidth, MinRedundantSignBits), + APInt(BitWidth, BitWidth)); + Known = Range.toKnownBits(); + break; + } case ISD::CTPOP: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we know some of the bits are zero, they can't be one. diff --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll index e050d0557dba1..e0cf26356da94 100644 --- a/llvm/test/CodeGen/AArch64/cls.ll +++ b/llvm/test/CodeGen/AArch64/cls.ll @@ -24,3 +24,118 @@ define i32 @cls64(i64 %t) { declare i32 @llvm.aarch64.cls(i32) nounwind declare i32 @llvm.aarch64.cls64(i64) nounwind + +define i8 @cls_i8(i8 %x) { +; CHECK-LABEL: cls_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cls w8, w8 +; CHECK-NEXT: sub w0, w8, #24 +; CHECK-NEXT: ret + + %a = ashr i8 %x, 7 + %b = xor i8 %x, %a + %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false) + %d = sub i8 %c, 1 + ret i8 %d +} + +; The result is in the range [1-31], so we don't need an andi after the cls. +define i32 @cls_i32_knownbits(i32 %x) { +; CHECK-LABEL: cls_i32_knownbits: +; CHECK: // %bb.0: +; CHECK-NEXT: cls w0, w0 +; CHECK-NEXT: ret + %a = ashr i32 %x, 31 + %b = xor i32 %x, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = and i32 %d, 31 + ret i32 %e +} + +; There are at least 16 redundant sign bits so we don't need an ori after the cls. +define i32 @cls_i32_knownbits_2(i16 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_2: +; CHECK: // %bb.0: +; CHECK-NEXT: cls w0, w0 +; CHECK-NEXT: ret + %sext = sext i16 %x to i32 + %a = ashr i32 %sext, 31 + %b = xor i32 %sext, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 16 + ret i32 %e +} + +; Check that the range max in ctls cls knownbits +; is not set to 32 +define i64 @cls_i64_not_32(i64 %x) { +; CHECK-LABEL: cls_i64_not_32: +; CHECK: // %bb.0: +; CHECK-NEXT: asr x8, x0, #16 +; CHECK-NEXT: cls x8, x8 +; CHECK-NEXT: orr x0, x8, #0x10 +; CHECK-NEXT: ret + %val = ashr i64 %x, 16 + %a = ashr i64 %val, 63 + %b = xor i64 %val, %a + %c = shl i64 %b, 1 + %d = or i64 %c, 1 + %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true) + %f = or i64 %e, 16 + ret i64 %f +} + +; There are at least 24 redundant sign bits so we don't need an ori after the clsw. +define i32 @cls_i32_knownbits_3(i8 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_3: +; CHECK: // %bb.0: +; CHECK-NEXT: cls w0, w0 +; CHECK-NEXT: ret + %sext = sext i8 %x to i32 + %a = ashr i32 %sext, 31 + %b = xor i32 %sext, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 24 + ret i32 %e +} + +; Negative test. We only know there is at least 1 redundant sign bit. We can't +; remove the ori. +define i32 @cls_i32_knownbits_4(i32 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_4: +; CHECK: // %bb.0: +; CHECK-NEXT: sbfx w8, w0, #0, #31 +; CHECK-NEXT: cls w8, w8 +; CHECK-NEXT: orr w0, w8, #0x1 +; CHECK-NEXT: ret + %shl = shl i32 %x, 1 + %ashr = ashr i32 %shl, 1 + %a = ashr i32 %ashr, 31 + %b = xor i32 %ashr, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 1 + ret i32 %e + } + +; Negative test. Check that the number of sign bits is not +; overestimated. If it is, the orr disappears. +define i32 @cls_i32_knownbits_no_overestimate(i32 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_no_overestimate: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #15 +; CHECK-NEXT: cls w8, w8 +; CHECK-NEXT: orr w0, w8, #0x10 +; CHECK-NEXT: ret + %ashr = ashr i32 %x, 15 + %a = ashr i32 %ashr, 31 + %b = xor i32 %ashr, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 16 + ret i32 %e + } diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll index 9f36f767c1ba7..67d76f69ab3dd 100644 --- a/llvm/test/CodeGen/RISCV/rv32p.ll +++ b/llvm/test/CodeGen/RISCV/rv32p.ll @@ -234,6 +234,88 @@ define i64 @cls_i64_2(i64 %x) { ret i64 %e } +; The result is in the range [1-31], so we don't need an andi after the cls. +define i32 @cls_i32_knownbits(i32 %x) { +; CHECK-LABEL: cls_i32_knownbits: +; CHECK: # %bb.0: +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ret + %a = ashr i32 %x, 31 + %b = xor i32 %x, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = and i32 %d, 31 + ret i32 %e +} + +; There are at least 16 redundant sign bits so we don't need an ori after the clsw. +define i32 @cls_i32_knownbits_2(i16 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_2: +; CHECK: # %bb.0: +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ret + %sext = sext i16 %x to i32 + %a = ashr i32 %sext, 31 + %b = xor i32 %sext, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 16 + ret i32 %e +} + +; There are at least 24 redundant sign bits so we don't need an ori after the clsw. +define i32 @cls_i32_knownbits_3(i8 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_3: +; CHECK: # %bb.0: +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ret + %sext = sext i8 %x to i32 + %a = ashr i32 %sext, 31 + %b = xor i32 %sext, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 24 + ret i32 %e +} + +; Negative test. We only know there is at least 1 redundant sign bit. We can't +; remove the ori. +define i32 @cls_i32_knownbits_4(i32 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_4: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: srai a0, a0, 1 +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ori a0, a0, 1 +; CHECK-NEXT: ret + %shl = shl i32 %x, 1 + %ashr = ashr i32 %shl, 1 + %a = ashr i32 %ashr, 31 + %b = xor i32 %ashr, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 1 + ret i32 %e + } + +; Negative test. Check that the number of sign bits is not +; overestimated. If it is, the orr disappears. +define i32 @cls_i32_knownbits_no_overestimate(i32 signext %x) { +; CHECK-LABEL: cls_i32_knownbits_no_overestimate: +; CHECK: # %bb.0: +; CHECK-NEXT: srai a0, a0, 15 +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ori a0, a0, 16 +; CHECK-NEXT: ret + %ashr = ashr i32 %x, 15 + %a = ashr i32 %ashr, 31 + %b = xor i32 %ashr, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + %e = or i32 %d, 16 + ret i32 %e + } + define i64 @slx_i64(i64 %x, i64 %y) { ; CHECK-LABEL: slx_i64: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll index 21779543ed011..2d6d615d9f7b9 100644 --- a/llvm/test/CodeGen/RISCV/rv64p.ll +++ b/llvm/test/CodeGen/RISCV/rv64p.ll @@ -311,6 +311,25 @@ define i64 @cls_i64_2(i64 %x) { ret i64 %e } +; Check that the range max in ctls cls knownbits +; is not set to 32 +define i64 @cls_i64_not_32(i64 %x) { +; CHECK-LABEL: cls_i64_not_32: +; CHECK: # %bb.0: +; CHECK-NEXT: srai a0, a0, 16 +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ori a0, a0, 16 +; CHECK-NEXT: ret + %val = ashr i64 %x, 16 + %a = ashr i64 %val, 63 + %b = xor i64 %val, %a + %c = shl i64 %b, 1 + %d = or i64 %c, 1 + %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true) + %f = or i64 %e, 16 + ret i64 %f +} + define i128 @slx_i128(i128 %x, i128 %y) { ; CHECK-LABEL: slx_i128: ; CHECK: # %bb.0: