[DAG] computeKnownBits - add CTLS handling#174824
Merged
Conversation
Member
|
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-backend-aarch64 Author: Gergo Stomfai (stomfaig) ChangesAdd handling for CTLS using the same method as in #174636. Added tests to AArch64 and RISCV, but it seems that ARM is actually resolving Full diff: https://github.com/llvm/llvm-project/pull/174824.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 15f86cb94f958..ba130a78a1af7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3870,6 +3870,14 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(LowBits);
break;
}
+ case ISD::CTLS: {
+ unsigned MinRedundantSignBits =
+ ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
+ APInt(BitWidth, 32));
+ Known = Range.toKnownBits();
+ break;
+ }
case ISD::CTPOP: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we know some of the bits are zero, they can't be one.
diff --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll
index f17ccf7d6f682..1eb6f7411a49d 100644
--- a/llvm/test/CodeGen/AArch64/cls.ll
+++ b/llvm/test/CodeGen/AArch64/cls.ll
@@ -18,3 +18,81 @@ define i32 @cls64(i64 %t) {
declare i32 @llvm.aarch64.cls(i32) nounwind
declare i32 @llvm.aarch64.cls64(i64) nounwind
+
+define i8 @cls_i8(i8 %x) {
+; CHECK-LABEL: cls_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: cls w8, w8
+; CHECK-NEXT: sub w0, w8, #24
+; CHECK-NEXT: ret
+
+ %a = ashr i8 %x, 7
+ %b = xor i8 %x, %a
+ %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false)
+ %d = sub i8 %c, 1
+ ret i8 %d
+}
+
+; The result is in the range [1-31], so we don't need an andi after the cls.
+define i32 @cls_i32_knownbits(i32 %x) {
+; CHECK-LABEL: cls_i32_knownbits:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %a = ashr i32 %x, 31
+ %b = xor i32 %x, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = and i32 %d, 31
+ ret i32 %e
+}
+
+; There are at least 16 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_2(i16 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %sext = sext i16 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 16
+ ret i32 %e
+}
+
+; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_3(i8 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %sext = sext i8 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 24
+ ret i32 %e
+}
+
+; Negative test. We only know there is at least 1 redundant sign bit. We can't
+; remove the ori.
+define i32 @cls_i32_knownbits_4(i32 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sbfx w8, w0, #0, #31
+; CHECK-NEXT: cls w8, w8
+; CHECK-NEXT: orr w0, w8, #0x1
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 1
+ %ashr = ashr i32 %shl, 1
+ %a = ashr i32 %ashr, 31
+ %b = xor i32 %ashr, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 1
+ ret i32 %e
+ }
diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll
index 9faac5df2a414..9611af1ee8c0e 100644
--- a/llvm/test/CodeGen/RISCV/rv32p.ll
+++ b/llvm/test/CodeGen/RISCV/rv32p.ll
@@ -234,6 +234,71 @@ define i64 @cls_i64_2(i64 %x) {
ret i64 %e
}
+; The result is in the range [1-31], so we don't need an andi after the cls.
+define i32 @cls_i32_knownbits(i32 %x) {
+; CHECK-LABEL: cls_i32_knownbits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ret
+ %a = ashr i32 %x, 31
+ %b = xor i32 %x, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = and i32 %d, 31
+ ret i32 %e
+}
+
+; There are at least 16 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_2(i16 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ret
+ %sext = sext i16 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 16
+ ret i32 %e
+}
+
+; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_3(i8 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ret
+ %sext = sext i8 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 24
+ ret i32 %e
+}
+
+; Negative test. We only know there is at least 1 redundant sign bit. We can't
+; remove the ori.
+define i32 @cls_i32_knownbits_4(i32 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: srai a0, a0, 1
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ori a0, a0, 1
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 1
+ %ashr = ashr i32 %shl, 1
+ %a = ashr i32 %ashr, 31
+ %b = xor i32 %ashr, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 1
+ ret i32 %e
+ }
+
+
define i64 @slx_i64(i64 %x, i64 %y) {
; CHECK-LABEL: slx_i64:
; CHECK: # %bb.0:
|
topperc
reviewed
Jan 7, 2026
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
Collaborator
|
Please add additional tests that would have caught the bugs. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
topperc
reviewed
Jan 12, 2026
RKSimon
added a commit
to RKSimon/llvm-project
that referenced
this pull request
Jan 14, 2026
Noticed when reviewing llvm#174824
RKSimon
added a commit
that referenced
this pull request
Jan 14, 2026
Noticed when reviewing #174824
Priyanshu3820
pushed a commit
to Priyanshu3820/llvm-project
that referenced
this pull request
Jan 18, 2026
Noticed when reviewing llvm#174824
Priyanshu3820
pushed a commit
to Priyanshu3820/llvm-project
that referenced
this pull request
Jan 18, 2026
Add handling for CTLS using the same method as in llvm#174636. Added tests to AArch64 and RISCV, but it seems that ARM is actually resolving `llvm.arm.cls` to `clz`, so not tests added there.
BStott6
pushed a commit
to BStott6/llvm-project
that referenced
this pull request
Jan 22, 2026
Noticed when reviewing llvm#174824
BStott6
pushed a commit
to BStott6/llvm-project
that referenced
this pull request
Jan 22, 2026
Add handling for CTLS using the same method as in llvm#174636. Added tests to AArch64 and RISCV, but it seems that ARM is actually resolving `llvm.arm.cls` to `clz`, so not tests added there.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Add handling for CTLS using the same method as in #174636.
Added tests to AArch64 and RISCV, but it seems that ARM is actually resolving
llvm.arm.clstoclz, so not tests added there.