From 9b6777ce7af047c8c680d1eb55595df86aafb7c5 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Fri, 18 Jul 2025 19:26:28 +0000 Subject: [PATCH 1/3] pre-commit tests --- llvm/test/CodeGen/NVPTX/f32x2-instructions.ll | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index af3cb63082e78..ea32170643702 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -1957,6 +1957,37 @@ define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { ret <2 x float> %r } +define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) { +; CHECK-LABEL: test_trunc_to_v2bf16( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2bf16_param_0]; +; CHECK-NEXT: st.b32 [%rd2], %rd1; +; CHECK-NEXT: ret; + %trunc = fptrunc <2 x float> %a to <2 x bfloat> + store <2 x bfloat> %trunc, ptr %p + ret void +} + +define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) { +; CHECK-LABEL: test_trunc_to_v2f16( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2f16_param_0]; +; CHECK-NEXT: st.b32 [%rd2], %rd1; +; CHECK-NEXT: ret; + %trunc = fptrunc <2 x float> %a to <2 x half> + store <2 x half> %trunc, ptr %p + ret void +} + + attributes #0 = { nounwind } attributes #1 = { "unsafe-fp-math" = "true" } attributes #2 = { "denormal-fp-math"="preserve-sign" } From e0923957f0a644dbc81a6d70533b2bcad68aef2c Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Fri, 18 Jul 2025 19:27:57 +0000 Subject: [PATCH 2/3] [NVPTX] Prevent fptrunc of v2f32 from being folded into store --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 ++ llvm/test/CodeGen/NVPTX/f32x2-instructions.ll | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7aa06f9079b09..4e7002feea215 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -731,6 +731,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setTruncStoreAction(MVT::f32, MVT::bf16, Expand); setTruncStoreAction(MVT::f64, MVT::bf16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand); // PTX does not support load / store predicate registers setOperationAction(ISD::LOAD, MVT::i1, Custom); diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index ea32170643702..f24428ebcfb8c 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -1960,12 +1960,14 @@ define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) { ; CHECK-LABEL: test_trunc_to_v2bf16( ; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2bf16_param_0]; -; CHECK-NEXT: st.b32 [%rd2], %rd1; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2bf16_param_0]; +; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.b32 [%rd2], %r3; ; CHECK-NEXT: ret; %trunc = fptrunc <2 x float> %a to <2 x bfloat> store <2 x bfloat> %trunc, ptr %p @@ -1975,12 +1977,14 @@ define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) { define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) { ; CHECK-LABEL: test_trunc_to_v2f16( ; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2f16_param_0]; -; CHECK-NEXT: st.b32 [%rd2], %rd1; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2f16_param_0]; +; CHECK-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.b32 [%rd2], %r3; ; CHECK-NEXT: ret; %trunc = fptrunc <2 x float> %a to <2 x half> store <2 x half> %trunc, ptr %p From d7ae03f8f3b583f17e28a97c9341c7302aa96d3f Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Fri, 18 Jul 2025 20:34:05 +0000 Subject: [PATCH 3/3] fixup tests --- llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll | 7 ++++--- llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 9 +++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index e2a914d8cfc36..ba5813c869236 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -359,11 +359,12 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b, define <2 x bfloat> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0]; +; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = fptrunc <2 x float> %a to <2 x bfloat> ret <2 x bfloat> %r diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index d0e2c1817f696..c765e34f3153f 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -1433,11 +1433,16 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0]; +; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r2; +; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r1; +; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = fptrunc <2 x float> %a to <2 x half> ret <2 x half> %r