[SPIR-V] Implement selection for llvm.canonicalize(x) as fmul(x, 1.0) by Maetveis · Pull Request #178439 · llvm/llvm-project

Maetveis · 2026-01-28T14:51:55Z

No description provided.

llvmbot · 2026-01-28T14:53:37Z

@llvm/pr-subscribers-backend-spir-v

Author: Mészáros Gergely (Maetveis)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/178439.diff

3 Files Affected:

(modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+32)
(modified) llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp (+3)
(modified) llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-intrinsics.ll (+45)

diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 915db6824d7c6..8bd3a7ce73ff2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -339,6 +339,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
                    MachineInstr &I) const;
   bool selectDerivativeInst(Register ResVReg, const SPIRVType *ResType,
                             MachineInstr &I, const unsigned DPdOpCode) const;
+  bool selectFCanonicalize(Register ResVReg, const SPIRVType *ResType,
+                           MachineInstr &I) const;
   // Utilities
   std::pair<Register, bool>
   buildI32Constant(uint32_t Val, MachineInstr &I,
@@ -987,6 +989,9 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
   case TargetOpcode::G_FMAXIMUM:
     return selectExtInst(ResVReg, ResType, I, CL::fmax, GL::NMax);
 
+  case TargetOpcode::G_FCANONICALIZE:
+    return selectFCanonicalize(ResVReg, ResType, I);
+
   case TargetOpcode::G_FCOPYSIGN:
     return selectExtInst(ResVReg, ResType, I, CL::copysign);
 
@@ -3007,6 +3012,33 @@ SPIRVInstructionSelector::buildI32Constant(uint32_t Val, MachineInstr &I,
   return {NewReg, Result};
 }
 
+bool SPIRVInstructionSelector::selectFCanonicalize(Register ResVReg,
+                                                   const SPIRVType *ResType,
+                                                   MachineInstr &I) const {
+  // There is no native fcanonicalize instruction in SPIRV. We can lower it to:
+  // - fmin(x, x) or
+  // - fmul(x, 1.0)
+  //
+  // We use fmul(x, 1.0) here, because:
+  // - llvm-spirv translates fmin to a function call, whereas
+  //   fmul is translated to the LLVM fmul instruction.
+  // - fmin requires either OpenCL or GLSL extended instruction set, whereas
+  //   fmul does not.
+
+  // fcanonicalize(x) -> fmul(x, 1.0)
+  SPIRVType *SpirvScalarType = GR.getScalarOrVectorComponentType(ResType);
+  auto Opcode = ResType->getOpcode() == SPIRV::OpTypeVector
+                    ? SPIRV::OpVectorTimesScalar
+                    : SPIRV::OpFMulS;
+
+  return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode))
+      .addDef(ResVReg)
+      .addUse(GR.getSPIRVTypeID(ResType))
+      .addUse(I.getOperand(1).getReg())
+      .addUse(buildOnesValF(SpirvScalarType, I))
+      .constrainAllUses(TII, TRI, RBI);
+}
+
 bool SPIRVInstructionSelector::selectFCmp(Register ResVReg,
                                           const SPIRVType *ResType,
                                           MachineInstr &I) const {
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index 03d846cb90b4c..fcfb2f33b88c3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -467,6 +467,9 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
                                G_INTRINSIC_ROUNDEVEN})
       .legalFor(allFloatScalarsAndVectors);
 
+  getActionDefinitionsBuilder(G_FCANONICALIZE)
+      .legalFor(allFloatScalarsAndVectors);
+
   getActionDefinitionsBuilder(G_FCOPYSIGN)
       .legalForCartesianProduct(allFloatScalarsAndVectors,
                                 allFloatScalarsAndVectors);
diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-intrinsics.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-intrinsics.ll
index 70030ca1a0316..41a69f3dfd6b3 100644
--- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fp-intrinsics.ll
@@ -8,6 +8,11 @@
 ; CHECK: %[[#var2:]] = OpTypeFloat 64
 ; CHECK: %[[#var3:]] = OpTypeVector %[[#var1]] 4
 
+; 15360 = 0x3c00 = 1.0 (bf16)
+; CHECK: %[[#one_f16:]] = OpConstant %[[#var0]] 15360
+; CHECK: %[[#one_f32:]] = OpConstant %[[#var1]] 1
+; CHECK: %[[#one_f64:]] = OpConstant %[[#var2]] 1
+
 ; CHECK: OpFunction
 ; CHECK: %[[#]] = OpExtInst %[[#var0]] %[[#extinst_id]] fabs
 ; CHECK: OpFunctionEnd
@@ -403,3 +408,43 @@ return:
 }
 
 declare { double, double } @llvm.modf.f64(double)
+
+; CHECK: OpFunction
+; CHECK: %[[#x:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpFMul %[[#var0]] %[[#x]] %[[#one_f16]]
+; CHECK: OpFunctionEnd
+define dso_local half @TestCanonicalizeF16(half %x) {
+entry:
+  %t = tail call half @llvm.canonicalize.f16(half %x)
+  ret half %t
+}
+
+; CHECK: OpFunction
+; CHECK: %[[#x:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpFMul %[[#var1]] %[[#x]] %[[#one_f32]]
+; CHECK: OpFunctionEnd
+define dso_local float @TestCanonicalizeF32(float %x) {
+entry:
+  %t = tail call float @llvm.canonicalize.f32(float %x)
+  ret float %t
+}
+
+; CHECK: OpFunction
+; CHECK: %[[#x:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpFMul %[[#var2]] %[[#x]] %[[#one_f64]]
+; CHECK: OpFunctionEnd
+define dso_local double @TestCanonicalizeF64(double %x) {
+entry:
+  %t = tail call double @llvm.canonicalize.f64(double %x)
+  ret double %t
+}
+
+; CHECK: OpFunction
+; CHECK: %[[#x:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpVectorTimesScalar %[[#var3]] %[[#x]] %[[#one_f32]]
+; CHECK: OpFunctionEnd
+define dso_local <4 x float> @TestCanonicalizeVec(<4 x float> %x) {
+entry:
+  %t = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %x)
+  ret <4 x float> %t
+}
\ No newline at end of file

arsenm · 2026-01-28T15:16:02Z

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

+bool SPIRVInstructionSelector::selectFCanonicalize(Register ResVReg,
+                                                   const SPIRVType *ResType,
+                                                   MachineInstr &I) const {
+  // There is no native fcanonicalize instruction in SPIRV. We can lower it to:


Suggested change

// There is no native fcanonicalize instruction in SPIRV. We can lower it to:

// We can lower it to:

There isn't one anywhere it's a synthetic compiler operation. This could be most any FP instruction.

arsenm · 2026-01-28T15:16:29Z

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

+                                                   const SPIRVType *ResType,
+                                                   MachineInstr &I) const {
+  // There is no native fcanonicalize instruction in SPIRV. We can lower it to:
+  // - fmin(x, x) or


I wouldn't bother mentioning the fmin case that you aren't using

arsenm · 2026-01-28T15:17:32Z

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

+  //
+  // We use fmul(x, 1.0) here, because:
+  // - llvm-spirv translates fmin to a function call, whereas
+  //   fmul is translated to the LLVM fmul instruction.


This is kind of problematic. If the net result is llvm.canonicalize -> spirv fmul -> llvm fmul, you're losing the semantics in the reloaded program

What do you mean exactly? The semantics of the original program should be preserved, I think.

This is the output of the translator when run on the SPIR-V module produced by the tests added here

; Function Attrs: nounwind define spir_func half @TestCanonicalizeF16(half %x) #0 { entry: %0 = fmul half %x, 0xH3C00 ret half %0 } ; Function Attrs: nounwind define spir_func float @TestCanonicalizeF32(float %x) #0 { entry: %0 = fmul float %x, 1.000000e+00 ret float %0 } ; Function Attrs: nounwind define spir_func double @TestCanonicalizeF64(double %x) #0 { entry: %0 = fmul double %x, 1.000000e+00 ret double %0 } ; Function Attrs: nounwind define spir_func <4 x float> @TestCanonicalizeVec(<4 x float> %x) #0 { entry: %scale = fmul <4 x float> %x, splat (float 1.000000e+00) ret <4 x float> %scale }

I don't see a functional problem here, or do you mean the loss of information in general? Outside of adding a new SPIR-V instruction, or special-casing OpFMul(x, 1.0) -> @llvm.canonicalize(x) in the bidirectional translator I don't think that's avoidable.

Canonicalization generally should perform the following, if I understand it correctly

canonicalize(sNan) -> qNaN

canonicalize(subnormal) -> +- 0 if DTZ is enabled.

Both should be done by fmul too.

IIUC middle-end optimizations can fold away the fmul in the reverse-translated LLVM IR.

IIUC middle-end optimizations can fold away the fmul in the reverse-translated LLVM IR.

Okay, yeah I can confirm that happening. Running opt -O3 on the above IR produces:

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) define spir_func half @TestCanonicalizeF16(half returned %x) local_unnamed_addr #2 { entry: ret half %x }

I do wonder though, doesn't this fold contradict the guarantee in LangRef about:

In particular, such a floating-point instruction returning a non-NaN value is guaranteed to always return the same bit-identical result on all machines and optimization levels.

Alive seems to agree: https://alive2.llvm.org/ce/z/abPSeW

@nikic, is this a bug I should report? The offending code is

llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp

Lines 6009 to 6011 in 12c13e0

// X * 1.0 --> X

if (match(Op1, m_FPOne()))

return Op0;

I guess #174293 would simplify fixing it.

Ignoring denormals for a moment there might still be a problem of fmul being allowed to return sNaN unmodified or alter NaN payloads. I say might, because the SPIR-V spec is quite vague about NaNs in general. I think I'll take this issue to discuss on the spir-v translator repo.

@Maetveis It's not a bug, see https://llvm.org/docs/LangRef.html#behavior-of-floating-point-nan-values. Not quieting sNaN is explicitly allowed for non-constrained FP.

For your purposes, if SPIRV cares about this and doesn't have an explicit canonicalize instruction, you should probably translate fmul x, 1.0 to the canonicalize intrinsic when raising SPIRV to LLVM.

Not quieting sNaN is explicitly allowed for non-constrained FP.

I understand that, the question was about denormals. fmul float %x, 1.0 should result in 0 with "denormal-fp-math"="preserve-sign", but the optimization changes it to return the denormal value unmodified.

Ah, okay. I don't think we really specify how this specific interaction of non-IEEE (denormal) fpenv with non-constrained FP works, but my general assumption was that omission of canonicalizing operations still holds in that mode. That's something we might want to change though. @arsenm Thoughts? (This probably needs an RFC.)

I don't think we really specify how this specific interaction of non-IEEE (denormal) fpenv with non-constrained FP works, but my general assumption was that omission of canonicalizing operations still holds in that mode.

I think this sentence from the description of "denormal-fp-math" disagrees:

If the input mode is "preserve-sign", or "positive-zero", a floating-point operation must treat any input denormal value as zero. In some situations, if an instruction does not respect this mode, the input may need to be converted to 0 as if by @llvm.canonicalize during lowering for correctness.

This is specified. Flushing denormals is never a guarantee. denormal-fp-math is not prescriptive of behavior of operations, it is an assertion of a hazardous FP environment. i.e., it's a warning "fmul" when executed on the machine will not behave properly, not that the IR is required to flush the input/output. The point of llvm.canonicalize is the one place where you can guarantee observing the environment denormal effect

I have been questioning whether we should keep maintaining this system of permitting canonicalize dropping. However, that still would not imply mandating fmul flush under a flushing environment. At minimum I think we need to stop allowing canonicalize dropping in codegen

[SPIR-V] Implement selection for llvm.canonicalize(x) as fmul(x, 1.0)

c9052f0

Maetveis requested review from arsenm and michalpaszkowski January 28, 2026 14:51

Maetveis mentioned this pull request Jan 28, 2026

[libclc] Refine __clc_fp*_subnormals_supported #157633

Open

llvmbot added the backend:SPIR-V label Jan 28, 2026

fix no newline at the end of file

e4a0dd7

arsenm added the floating-point Floating-point math label Jan 28, 2026

arsenm reviewed Jan 28, 2026

View reviewed changes

Maetveis marked this pull request as draft January 29, 2026 11:22

jmmartinez mentioned this pull request Jan 30, 2026

[SPIRV] Emit intrinsics for globals only in function that references them #178143

Merged

Maetveis mentioned this pull request Feb 2, 2026

[Question / Feature Request] Translation of floating point canonicalize (@llvm.canonicalize) KhronosGroup/SPIRV-LLVM-Translator#3559

Open

	// There is no native fcanonicalize instruction in SPIRV. We can lower it to:
	// We can lower it to:

Conversation

Maetveis commented Jan 28, 2026

Uh oh!

llvmbot commented Jan 28, 2026

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Maetveis Jan 29, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Maetveis Jan 29, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Maetveis Jan 29, 2026 •

edited

Loading

Maetveis Jan 29, 2026 •

edited

Loading