-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[DAGCombiner] Fold [us]itofp of truncate #149391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner] Fold [us]itofp of truncate #149391
Conversation
|
@llvm/pr-subscribers-backend-nvptx @llvm/pr-subscribers-llvm-selectiondag Author: Alex MacLean (AlexMaclean) ChangesFull diff: https://github.com/llvm/llvm-project/pull/149391.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 40464e91f9efc..39b46240e6c71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18727,6 +18727,13 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
return FTrunc;
+
+ // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
+ if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
+ TLI.isTypeDesirableForOp(ISD::SINT_TO_FP,
+ N0.getOperand(0).getValueType()))
+ return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
+
return SDValue();
}
@@ -18764,6 +18771,13 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
return FTrunc;
+
+ // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
+ if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
+ TLI.isTypeDesirableForOp(ISD::UINT_TO_FP,
+ N0.getOperand(0).getValueType()))
+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index cbc9f700b1f01..aba20e6b0f27f 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -2311,4 +2311,51 @@ entry:
ret void
}
+define <4 x float> @test_uitofp_v4i8(<4 x i8> %a) {
+; CHECK-LABEL: test_uitofp_v4i8(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<10>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_uitofp_v4i8_param_0];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7773U;
+; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2;
+; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
+; CHECK-NEXT: cvt.rn.f32.u32 %r5, %r4;
+; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
+; CHECK-NEXT: cvt.rn.f32.u32 %r7, %r6;
+; CHECK-NEXT: prmt.b32 %r8, %r1, 0, 0x7770U;
+; CHECK-NEXT: cvt.rn.f32.u32 %r9, %r8;
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r9, %r7, %r5, %r3};
+; CHECK-NEXT: ret;
+ %r = uitofp <4 x i8> %a to <4 x float>
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_sitofp_v4i8(<4 x i8> %a) {
+; CHECK-LABEL: test_sitofp_v4i8(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<10>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_sitofp_v4i8_param_0];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0xbbb3U;
+; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
+; CHECK-NEXT: cvt.rn.f32.s16 %r3, %rs1;
+; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0xaaa2U;
+; CHECK-NEXT: cvt.u16.u32 %rs2, %r4;
+; CHECK-NEXT: cvt.rn.f32.s16 %r5, %rs2;
+; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
+; CHECK-NEXT: cvt.u16.u32 %rs3, %r6;
+; CHECK-NEXT: cvt.rn.f32.s16 %r7, %rs3;
+; CHECK-NEXT: prmt.b32 %r8, %r1, 0, 0x8880U;
+; CHECK-NEXT: cvt.u16.u32 %rs4, %r8;
+; CHECK-NEXT: cvt.rn.f32.s16 %r9, %rs4;
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r9, %r7, %r5, %r3};
+; CHECK-NEXT: ret;
+ %r = sitofp <4 x i8> %a to <4 x float>
+ ret <4 x float> %r
+}
+
attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/NVPTX/trunc-tofp.ll b/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
new file mode 100644
index 0000000000000..404c423cc026a
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_80 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_80 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define float @uitofp_trunc_nuw(i32 %x, i32 %y) {
+; CHECK-LABEL: uitofp_trunc_nuw(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [uitofp_trunc_nuw_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [uitofp_trunc_nuw_param_1];
+; CHECK-NEXT: add.s32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %v = add i32 %x, %y
+ %t = trunc nuw i32 %v to i16
+ %f = uitofp i16 %t to float
+ ret float %f
+}
+
+define float @sitofp_trunc_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: sitofp_trunc_nsw(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [sitofp_trunc_nsw_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [sitofp_trunc_nsw_param_1];
+; CHECK-NEXT: add.s32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %v = add i32 %x, %y
+ %t = trunc nsw i32 %v to i16
+ %f = sitofp i16 %t to float
+ ret float %f
+}
+
+define float @uitofp_trunc_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: uitofp_trunc_nsw(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [uitofp_trunc_nsw_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [uitofp_trunc_nsw_param_1];
+; CHECK-NEXT: add.s32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
+; CHECK-NEXT: cvt.rn.f32.u16 %r4, %rs1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %v = add i32 %x, %y
+ %t = trunc nsw i32 %v to i16
+ %f = uitofp i16 %t to float
+ ret float %f
+}
+
+define float @sitofp_trunc_nuw(i32 %x, i32 %y) {
+; CHECK-LABEL: sitofp_trunc_nuw(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [sitofp_trunc_nuw_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [sitofp_trunc_nuw_param_1];
+; CHECK-NEXT: add.s32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
+; CHECK-NEXT: cvt.rn.f32.s16 %r4, %rs1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %v = add i32 %x, %y
+ %t = trunc nuw i32 %v to i16
+ %f = sitofp i16 %t to float
+ ret float %f
+}
|
84c8753 to
4639393
Compare
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/160/builds/21409 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/180/builds/21552 Here is the relevant piece of the build log for the reference |
No description provided.