Skip to content

[DAG] computeKnownBits - add CTLS handling#174824

Merged
RKSimon merged 9 commits intollvm:mainfrom
stomfaig:issue_174370
Jan 14, 2026
Merged

[DAG] computeKnownBits - add CTLS handling#174824
RKSimon merged 9 commits intollvm:mainfrom
stomfaig:issue_174370

Conversation

@stomfaig
Copy link
Contributor

@stomfaig stomfaig commented Jan 7, 2026

Add handling for CTLS using the same method as in #174636.

Added tests to AArch64 and RISCV, but it seems that ARM is actually resolving llvm.arm.cls to clz, so not tests added there.

@stomfaig stomfaig changed the title [DAG][GISel] computeKnownBits - add CTLS handling [DAG] computeKnownBits - add CTLS handling Jan 7, 2026
@llvmbot
Copy link
Member

llvmbot commented Jan 7, 2026

@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-aarch64

Author: Gergo Stomfai (stomfaig)

Changes

Add handling for CTLS using the same method as in #174636.

Added tests to AArch64 and RISCV, but it seems that ARM is actually resolving llvm.arm.cls to clz, so not tests added there.


Full diff: https://github.com/llvm/llvm-project/pull/174824.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+8)
  • (modified) llvm/test/CodeGen/AArch64/cls.ll (+78)
  • (modified) llvm/test/CodeGen/RISCV/rv32p.ll (+65)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 15f86cb94f958..ba130a78a1af7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3870,6 +3870,14 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     Known.Zero.setBitsFrom(LowBits);
     break;
   }
+  case ISD::CTLS: {
+    unsigned MinRedundantSignBits =
+        ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
+                        APInt(BitWidth, 32));
+    Known = Range.toKnownBits();
+    break;
+  }
   case ISD::CTPOP: {
     Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     // If we know some of the bits are zero, they can't be one.
diff --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll
index f17ccf7d6f682..1eb6f7411a49d 100644
--- a/llvm/test/CodeGen/AArch64/cls.ll
+++ b/llvm/test/CodeGen/AArch64/cls.ll
@@ -18,3 +18,81 @@ define i32 @cls64(i64 %t) {
 
 declare i32 @llvm.aarch64.cls(i32) nounwind
 declare i32 @llvm.aarch64.cls64(i64) nounwind
+
+define i8 @cls_i8(i8 %x) {
+; CHECK-LABEL: cls_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxtb w8, w0 
+; CHECK-NEXT:    cls w8, w8 
+; CHECK-NEXT:    sub w0, w8, #24 
+; CHECK-NEXT:    ret
+
+  %a = ashr i8 %x, 7
+  %b = xor i8 %x, %a
+  %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false)
+  %d = sub i8 %c, 1
+  ret i8 %d
+}
+
+; The result is in the range [1-31], so we don't need an andi after the cls.
+define i32 @cls_i32_knownbits(i32 %x) {
+; CHECK-LABEL: cls_i32_knownbits:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cls	w0, w0
+; CHECK-NEXT:    ret
+  %a = ashr i32 %x, 31
+  %b = xor i32 %x, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = and i32 %d, 31
+  ret i32 %e
+}
+
+; There are at least 16 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_2(i16 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cls w0, w0
+; CHECK-NEXT:    ret
+  %sext = sext i16 %x to i32
+  %a = ashr i32 %sext, 31
+  %b = xor i32 %sext, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = or i32 %d, 16
+  ret i32 %e
+}
+
+; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_3(i8 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cls	w0, w0
+; CHECK-NEXT:    ret
+  %sext = sext i8 %x to i32
+  %a = ashr i32 %sext, 31
+  %b = xor i32 %sext, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = or i32 %d, 24
+  ret i32 %e
+}
+
+; Negative test. We only know there is at least 1 redundant sign bit. We can't
+; remove the ori.
+define i32 @cls_i32_knownbits_4(i32 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:   sbfx	w8, w0, #0, #31
+; CHECK-NEXT:	  cls	w8, w8
+; CHECK-NEXT:	  orr	w0, w8, #0x1
+; CHECK-NEXT:	  ret
+  %shl = shl i32 %x, 1
+  %ashr = ashr i32 %shl, 1
+  %a = ashr i32 %ashr, 31
+  %b = xor i32 %ashr, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = or i32 %d, 1
+  ret i32 %e
+ }
diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll
index 9faac5df2a414..9611af1ee8c0e 100644
--- a/llvm/test/CodeGen/RISCV/rv32p.ll
+++ b/llvm/test/CodeGen/RISCV/rv32p.ll
@@ -234,6 +234,71 @@ define i64 @cls_i64_2(i64 %x) {
   ret i64 %e
 }
 
+; The result is in the range [1-31], so we don't need an andi after the cls.
+define i32 @cls_i32_knownbits(i32 %x) {
+; CHECK-LABEL: cls_i32_knownbits:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cls	a0, a0
+; CHECK-NEXT:    ret
+  %a = ashr i32 %x, 31
+  %b = xor i32 %x, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = and i32 %d, 31
+  ret i32 %e
+}
+
+; There are at least 16 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_2(i16 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cls a0, a0
+; CHECK-NEXT:    ret
+  %sext = sext i16 %x to i32
+  %a = ashr i32 %sext, 31
+  %b = xor i32 %sext, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = or i32 %d, 16
+  ret i32 %e
+}
+
+; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_3(i8 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cls	a0, a0
+; CHECK-NEXT:    ret
+  %sext = sext i8 %x to i32
+  %a = ashr i32 %sext, 31
+  %b = xor i32 %sext, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = or i32 %d, 24
+  ret i32 %e
+}
+
+; Negative test. We only know there is at least 1 redundant sign bit. We can't
+; remove the ori.
+define i32 @cls_i32_knownbits_4(i32 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: srai a0, a0, 1
+; CHECK-NEXT: cls a0, a0 
+; CHECK-NEXT: ori a0, a0, 1 
+; CHECK-NEXT: ret 
+  %shl = shl i32 %x, 1
+  %ashr = ashr i32 %shl, 1
+  %a = ashr i32 %ashr, 31
+  %b = xor i32 %ashr, %a
+  %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+  %d = sub i32 %c, 1
+  %e = or i32 %d, 1
+  ret i32 %e
+ }
+ 
+
 define i64 @slx_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: slx_i64:
 ; CHECK:       # %bb.0:

@topperc topperc self-requested a review January 7, 2026 18:57
@github-actions
Copy link

github-actions bot commented Jan 7, 2026

🐧 Linux x64 Test Results

  • 188403 tests passed
  • 5000 tests skipped

✅ The build succeeded and all tests passed.

@topperc
Copy link
Collaborator

topperc commented Jan 7, 2026

Please add additional tests that would have caught the bugs.

@topperc topperc requested a review from RKSimon January 7, 2026 22:36
@github-actions
Copy link

github-actions bot commented Jan 8, 2026

🪟 Windows x64 Test Results

  • 129384 tests passed
  • 2862 tests skipped

✅ The build succeeded and all tests passed.

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Jan 14, 2026
RKSimon added a commit that referenced this pull request Jan 14, 2026
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@RKSimon RKSimon enabled auto-merge (squash) January 14, 2026 14:30
@RKSimon RKSimon merged commit 5f31b9c into llvm:main Jan 14, 2026
10 of 11 checks passed
Priyanshu3820 pushed a commit to Priyanshu3820/llvm-project that referenced this pull request Jan 18, 2026
Priyanshu3820 pushed a commit to Priyanshu3820/llvm-project that referenced this pull request Jan 18, 2026
Add handling for CTLS using the same method as in
llvm#174636.

Added tests to AArch64 and RISCV, but it seems that ARM is actually
resolving `llvm.arm.cls` to `clz`, so not tests added there.
BStott6 pushed a commit to BStott6/llvm-project that referenced this pull request Jan 22, 2026
BStott6 pushed a commit to BStott6/llvm-project that referenced this pull request Jan 22, 2026
Add handling for CTLS using the same method as in
llvm#174636.

Added tests to AArch64 and RISCV, but it seems that ARM is actually
resolving `llvm.arm.cls` to `clz`, so not tests added there.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants