Skip to content

Commit 4639393

Browse files
committed
[DAGCombiner] Fold [us]itofp of truncate
1 parent 0002c17 commit 4639393

File tree

3 files changed

+18
-15
lines changed

3 files changed

+18
-15
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18727,6 +18727,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
1872718727
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
1872818728
return FTrunc;
1872918729

18730+
// fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18731+
if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18732+
TLI.isTypeDesirableForOp(ISD::SINT_TO_FP,
18733+
N0.getOperand(0).getValueType()))
18734+
return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18735+
1873018736
return SDValue();
1873118737
}
1873218738

@@ -18764,6 +18770,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
1876418770
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
1876518771
return FTrunc;
1876618772

18773+
// fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18774+
if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18775+
TLI.isTypeDesirableForOp(ISD::UINT_TO_FP,
18776+
N0.getOperand(0).getValueType()))
18777+
return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18778+
1876718779
return SDValue();
1876818780
}
1876918781

llvm/test/CodeGen/NVPTX/i8x4-instructions.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2314,23 +2314,18 @@ entry:
23142314
define <4 x float> @test_uitofp_v4i8(<4 x i8> %a) {
23152315
; CHECK-LABEL: test_uitofp_v4i8(
23162316
; CHECK: {
2317-
; CHECK-NEXT: .reg .b16 %rs<5>;
23182317
; CHECK-NEXT: .reg .b32 %r<10>;
23192318
; CHECK-EMPTY:
23202319
; CHECK-NEXT: // %bb.0:
23212320
; CHECK-NEXT: ld.param.b32 %r1, [test_uitofp_v4i8_param_0];
23222321
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7773U;
2323-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
2324-
; CHECK-NEXT: cvt.rn.f32.u16 %r3, %rs1;
2322+
; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2;
23252323
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
2326-
; CHECK-NEXT: cvt.u16.u32 %rs2, %r4;
2327-
; CHECK-NEXT: cvt.rn.f32.u16 %r5, %rs2;
2324+
; CHECK-NEXT: cvt.rn.f32.u32 %r5, %r4;
23282325
; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
2329-
; CHECK-NEXT: cvt.u16.u32 %rs3, %r6;
2330-
; CHECK-NEXT: cvt.rn.f32.u16 %r7, %rs3;
2326+
; CHECK-NEXT: cvt.rn.f32.u32 %r7, %r6;
23312327
; CHECK-NEXT: prmt.b32 %r8, %r1, 0, 0x7770U;
2332-
; CHECK-NEXT: cvt.u16.u32 %rs4, %r8;
2333-
; CHECK-NEXT: cvt.rn.f32.u16 %r9, %rs4;
2328+
; CHECK-NEXT: cvt.rn.f32.u32 %r9, %r8;
23342329
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r9, %r7, %r5, %r3};
23352330
; CHECK-NEXT: ret;
23362331
%r = uitofp <4 x i8> %a to <4 x float>

llvm/test/CodeGen/NVPTX/trunc-tofp.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,13 @@ target triple = "nvptx64-nvidia-cuda"
77
define float @uitofp_trunc_nuw(i32 %x, i32 %y) {
88
; CHECK-LABEL: uitofp_trunc_nuw(
99
; CHECK: {
10-
; CHECK-NEXT: .reg .b16 %rs<2>;
1110
; CHECK-NEXT: .reg .b32 %r<5>;
1211
; CHECK-EMPTY:
1312
; CHECK-NEXT: // %bb.0:
1413
; CHECK-NEXT: ld.param.b32 %r1, [uitofp_trunc_nuw_param_0];
1514
; CHECK-NEXT: ld.param.b32 %r2, [uitofp_trunc_nuw_param_1];
1615
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
17-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
18-
; CHECK-NEXT: cvt.rn.f32.u16 %r4, %rs1;
16+
; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r3;
1917
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
2018
; CHECK-NEXT: ret;
2119
%v = add i32 %x, %y
@@ -27,15 +25,13 @@ define float @uitofp_trunc_nuw(i32 %x, i32 %y) {
2725
define float @sitofp_trunc_nsw(i32 %x, i32 %y) {
2826
; CHECK-LABEL: sitofp_trunc_nsw(
2927
; CHECK: {
30-
; CHECK-NEXT: .reg .b16 %rs<2>;
3128
; CHECK-NEXT: .reg .b32 %r<5>;
3229
; CHECK-EMPTY:
3330
; CHECK-NEXT: // %bb.0:
3431
; CHECK-NEXT: ld.param.b32 %r1, [sitofp_trunc_nsw_param_0];
3532
; CHECK-NEXT: ld.param.b32 %r2, [sitofp_trunc_nsw_param_1];
3633
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
37-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
38-
; CHECK-NEXT: cvt.rn.f32.s16 %r4, %rs1;
34+
; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r3;
3935
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
4036
; CHECK-NEXT: ret;
4137
%v = add i32 %x, %y

0 commit comments

Comments
 (0)