From 3942a20f3e6937928526b6661c3a064d588f40a8 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Thu, 26 Jan 2023 10:25:44 -0800 Subject: [PATCH 1/7] Add new intrinsics and attributes to control accuracy of FP calls This patch adds a new set of fpbuiltin intrinsics to represent operations that are equivalent to common math library functions and basic operations, and adds a new call site attribute ("fp-max-error") to specify the required accuracy of these calls. The purpose of these new IR constructs is to support alternate math library implementations and provide a general mechanism for selecting among multiple implementations based on specific requirements. --- llvm/docs/LangRef.rst | 1183 +++++++++++++++++ .../include/llvm/Analysis/AltMathLibFuncs.def | 82 ++ .../include/llvm/Analysis/TargetLibraryInfo.h | 43 + .../include/llvm/CodeGen/CodeGenPassBuilder.h | 2 + .../llvm/CodeGen/FPBuiltinFnSelection.h | 29 + .../llvm/CodeGen/MachinePassRegistry.def | 1 + llvm/include/llvm/CodeGen/Passes.h | 4 + llvm/include/llvm/IR/FPBuiltinOps.def | 59 + llvm/include/llvm/IR/IntrinsicInst.h | 26 + llvm/include/llvm/IR/Intrinsics.td | 102 ++ llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/Analysis/TargetLibraryInfo.cpp | 92 ++ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGen.cpp | 1 + llvm/lib/CodeGen/FPBuiltinFnSelection.cpp | 181 +++ llvm/lib/CodeGen/TargetPassConfig.cpp | 1 + llvm/lib/IR/IntrinsicInst.cpp | 69 + llvm/test/CodeGen/AArch64/O0-pipeline.ll | 1 + llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 5 + llvm/test/CodeGen/ARM/O3-pipeline.ll | 1 + .../Generic/fp-builtin-intrinsics-nomatch.ll | 16 + .../fp-builtin-intrinsics-unhandled.ll | 16 + .../CodeGen/Generic/fp-builtin-intrinsics.ll | 186 +++ llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + llvm/test/CodeGen/X86/O0-pipeline.ll | 1 + llvm/test/CodeGen/X86/opt-pipeline.ll | 1 + llvm/tools/opt/opt.cpp | 37 +- 30 files changed, 2127 insertions(+), 18 deletions(-) create mode 100644 llvm/include/llvm/Analysis/AltMathLibFuncs.def create mode 100644 llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h create mode 100644 llvm/include/llvm/IR/FPBuiltinOps.def create mode 100644 llvm/lib/CodeGen/FPBuiltinFnSelection.cpp create mode 100644 llvm/test/CodeGen/Generic/fp-builtin-intrinsics-nomatch.ll create mode 100644 llvm/test/CodeGen/Generic/fp-builtin-intrinsics-unhandled.ll create mode 100644 llvm/test/CodeGen/Generic/fp-builtin-intrinsics.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1b322d30e7100..ae8b2f5b0e42e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -22923,6 +22923,1189 @@ Returns another pointer that aliases its argument but which has no associated ``invariant.group`` metadata. It does not read any memory and can be speculated. +.. _fpbuiltin: + +Floating-Point Builtin Intrinsics +------------------------------------- + +These intrinsics are used to represent common floating-point operations with +the explicit expectation that the semantics of the operation may be modified +by call-site attributes that are specific to these intrinsics. Although many +of these operations correspond directly to functions defined by the standard +C math library, these intrinsics are intended to allow replacement of the +intrinsic with implementation outside the standard library, such as vector +implementations of the operation or alternate implementations to satisfy +different accuracy requirements. + +The following call-site attributes are currently recognized as being associated +with the floating-point builtin intrinsics: + +``"fpbuiltin-max-error"=""`` + This attribute specifies the required accuracy for the operation in ULPs. + The accuracy value must be a non-negative floating-point number. A value + of 0.5 or less indicates that the result is required to be correctly + rounded according to IEEE-754 rules. The default rounding mode + (round-to-nearest) may be assumed. + + If this attribute is absent, basic operations (fadd, fsub, fmul, fdiv, + frem, and sqrt) are assumed to provide correctly rounded result. The + accuracy of other operations is target-dependent, corresponding to the + accuracy of the target-default implementation of the operation (usually + the implementation provided by the standard math library). If this + attribute is present, the intrinsic may only be replaced with + implementations which are known to provide at least the accuracy described. + An implementation which is more accurate than required by this attribute + may be used. + +The semantics of the fpbuiltin intrinsics may be further constrained by defining +new callsite attributes beginning with "fpbuiltin-". All such string attribute +identifiers are considered reserved for use with fpbuiltin intrinsics. + +No transformation should be performed on any fpbuiltin intrinsic if the +intrinsic has any callsite attributes begining with "fpbuiltin-" that that code +performing the transformation does not recognize. + +Unless otherwise specified using callsite attributes, the fpbuiltin intrinsics +do not set ``errno`` or and may be assumed not to trap or raise floating-point +exceptions. + +All fpbuiltin intrinsics are overloaded intrinsics which may operate on any +scalar or vector floating-point type. Not all targets support all types. + +'``llvm.fpbuiltin.fadd``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.fadd( , ) + +Overview: +""""""""" + +The '``llvm.fpbuiltin.fadd``' intrinsic returns the sum of its two operands. + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.fadd``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating-point sum of the two value operands and has +the same type as the operands. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to be correctly rounded. + + +'``llvm.fpbuiltin.fsub``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.fsub( , ) + +Overview: +""""""""" + +The '``llvm.fpbuiltin.fsub``' intrinsic returns the difference of its two +operands. + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.fsub``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating-point difference of the two value operands +and has the same type as the operands. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to be correctly +rounded. + + +'``llvm.fpbuiltin.fmul``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.fmul( , ) + +Overview: +""""""""" + +The '``llvm.fpbuiltin.fmul``' intrinsic returns the product of its two operands. + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.fmul``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating-point product of the two value operands and +has the same type as the operands. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to be correctly rounded. + + +'``llvm.fpbuiltin.fdiv``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.fdiv( , ) + +Overview: +""""""""" + +The '``llvm.fpbuiltin.fdiv``' intrinsic returns the quotient of its two +operands. + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.fdiv``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating-point quotient of the two value operands and +has the same type as the operands. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to be correctly rounded. + + +'``llvm.fpbuiltin.frem``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.frem( , ) + +Overview: +""""""""" + +The '``llvm.fpbuiltin.frem``' intrinsic returns the remainder from the division +of its two operands. + + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.frem``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating-point remainder from the division of the two +value operands and has the same type as the operands. The remainder has the +same sign as the dividend. Unless modified by the "fpbuiltin-max-error" callsite +attribute, the result is assumed to be correctly rounded. + + +'``llvm.fpbuilt.sin``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.sin( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.sin``' intrinsics return the sine of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.sin``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point sine of the operand and +has the same type as the operand. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to have the accuracy of the +target-default implementation of the sine operation for the input type. + + +'``llvm.fpbuilt.cos``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.cos( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.cos``' intrinsics return the cosine of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.cos``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point cosine of the operand and +has the same type as the operand. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to have the accuracy of the +target-default implementation of the cosine operation for the input type. + + +'``llvm.fpbuilt.tan``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.tan( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.tan``' intrinsics return the tangent of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.tan``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point tangent of the operand and +has the same type as the operand. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to have the accuracy of the +target-default implementation of the tangent operation for the input type. + + +'``llvm.fpbuilt.sinh^``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.sinh( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.sinh``' intrinsics return the hyperbolic sine of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.sinh``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point hyperbolic sine of the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the hyperbolic sine +operation for the input type. + + +'``llvm.fpbuilt.cosh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.cosh( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.cosh``' intrinsics return the hyperbolic cosine of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.cosh``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point hyperbolic cosine of the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the hyperbolic cosine +operation for the input type. + + +'``llvm.fpbuilt.tanh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.tanh( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.tanh``' intrinsics return the hyperbolic tangent of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.tanh``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point hyperbolic tangent of the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the hyperbolic tangent +operation for the input type. + + +'``llvm.fpbuilt.asin``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.asin( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.asin``' intrinsics return the principal value of the +arc sine of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.asin``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the principal value of the floating-point arc sine of +the operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the arc sine operation +for the input type. + + +'``llvm.fpbuilt.acos``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.acos( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.acos``' intrinsics return the principal value of the +arc cosine of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.acos``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the principal value of the floating-point arc cosine +of the operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the arc cosine operation +for the input type. + + +'``llvm.fpbuilt.atan``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.atan( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.atan``' intrinsics return the principal value of the +arc tangent of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.atan``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the principal value of the floating-point arc tangent +of the operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the arc tangent operation +for the input type. + + +'``llvm.fpbuilt.atan2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.atan2( , ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.atan2``' intrinsics return the principal value of the +arc tangent of op1/op2, expressed in radians. + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.atan2``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the principal value of the floating-point arc tangent +of the quotient of the operands, expressed in radians, and has the same type +as the operands. Unless modified by the "fpbuiltin-max-error" callsite +attribute, the result is assumed to have the accuracy of the target-default +implementation of the atan2 operation for the input type. + + +'``llvm.fpbuilt.asinh^``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.asinh( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.asinh``' intrinsics return the area hyperbolic sine of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.asinh``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point area hyperbolic sine of the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the area hyperbolic sine +operation for the input type. + + +'``llvm.fpbuilt.acosh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.acosh( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.acosh``' intrinsics return the area hyperbolic cosine of +the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.acosh``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point area hyperbolic cosine of the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the area hyperbolic cosine +operation for the input type. + + +'``llvm.fpbuilt.atanh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.atanh( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.tanh``' intrinsics return the area hyperbolic tangent of +the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.atanh``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point area hyperbolic tangent of the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the area hyperbolic +tangent operation for the input type. + + +'``llvm.fpbuilt.exp``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.exp( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.exp``' intrinsics return the base-e exponential function +of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.exp``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point base-e exponential function of the +operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the exp operation for +the input type. + + +'``llvm.fpbuilt.exp2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.exp2( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.exp2``' intrinsics return the base-2 exponential function +of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.exp2``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point base-2 exponential function of the +operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the exp2 operation for +the input type. + + +'``llvm.fpbuilt.exp10``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.exp10( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.exp10``' intrinsics return the base-10 exponential function +of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.exp10``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point base-10 exponential function of the +operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the exp10 operation +for the input type. + + +'``llvm.fpbuilt.expm1``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.expm1( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.expm1``' intrinsics return e raised to the power of the +operand minus one. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.expm1``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point value of e raised to the power the +operand minus one and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the expm1 operation +for the input type. + + +'``llvm.fpbuilt.log``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.log( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.log``' intrinsics return the natural logarithm of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.log``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point natural logarithm of the operand and +has the same type as the operand. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to have the accuracy of the +target-default implementation of the log operation for the input type. + + +'``llvm.fpbuilt.log2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.log2( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.log2``' intrinsics return the base-2 logarithm of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.log2``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point base-2 logarithm of the operand and +has the same type as the operand. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to have the accuracy of the +target-default implementation of the log2 operation for the input type. + + +'``llvm.fpbuilt.log10``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.log10( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.log10``' intrinsics return the base-10 logarithm of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.log10``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point base-10 logarithm of the operand and +has the same type as the operand. Unless modified by the "fpbuiltin-max-error" +callsite attribute, the result is assumed to have the accuracy of the +target-default implementation of the log10 operation for the input type. + + +'``llvm.fpbuilt.log1p``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.log( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.log``' intrinsics return the natural logarithm of +one plus the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.log1p``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point natural logarithm of one plus +the operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the log1p operation +for the input type. + + +'``llvm.fpbuilt.hypot``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.hypot( , ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.hypot``' intrinsics return the hypotenuse of a +right triangle whose legs are op1 and op2. + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.hypot``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating-point hypotenuse of a right triangle +whose legs are the operands and has the same type as the operands. Unless +modified by the "fpbuiltin-max-error" callsite attribute, the result is +assumed to have the accuracy of the target-default implementation of the +hypot operation for the input type. + + +'``llvm.fpbuilt.pow``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.pow( , ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.pow``' intrinsics return the value of op1 raised +to the power of op2. + +Arguments: +"""""""""" + +The arguments to the '``llvm.fpbuiltin.pow``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. Both arguments must have identical types. + +Semantics: +"""""""""" + +The value produced is the floating-point value of the first operand raised +to the power of the second operand and has the same type as the operands. +Unless modified by the "fpbuiltin-max-error" callsite attribute, the result is +assumed to have the accuracy of the target-default implementation of the pow +operation for the input type. + + +'``llvm.fpbuilt.ldexp``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.ldexp( , ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.ldexp``' intrinsics return the value of op1 multiplied by +by two raised to the power of op2. + +Arguments: +"""""""""" + +The first argument to the '``llvm.fpbuiltin.ldexp``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. The second argument must be a 32-bit integer value +or a :ref:`vector ` of 32-bit integers with the same number of +elements as the first operand. + +Semantics: +"""""""""" + +The value produced is the floating-point value of the first operand multiplied +by two raised to the power of the second operand and has the same type as the +operands. Unless modified by the "fpbuiltin-max-error" callsite attribute, +the result is assumed to have the accuracy of the target-default implementation +of the ldexp operation for the input type. + + +'``llvm.fpbuilt.sqrt``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.sqrt( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.sqrt``' intrinsics return the square root of the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.sqrt``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point square root the operand and +has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to be +correctly rounded. + + +'``llvm.fpbuilt.rsqrt``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.rsqrt( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.sqrt``' intrinsics return the inverse square root of the +operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.rsqrt``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point inverse square root the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the rsqrt operation +for the input type. + + +'``llvm.fpbuilt.erf``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.erf( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.erf``' intrinsics return the error function value for +the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.erf``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point error function value for the operand +and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the erf operation for +the input type. + + +'``llvm.fpbuilt.erfc``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.erfc( ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.erfc``' intrinsics return the error function value for +the operand. + +Arguments: +"""""""""" + +The argument to the '``llvm.fpbuiltin.erfc``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. + +Semantics: +"""""""""" + +The value produced is the floating-point complementary error function value +for the operand and has the same type as the operand. Unless modified by the +"fpbuiltin-max-error" callsite attribute, the result is assumed to have +the accuracy of the target-default implementation of the erf operation for +the input type. + + +'``llvm.fpbuilt.sincos``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.fpbuiltin.sincos( , ptr , ptr ) + + +Overview: +""""""""" + +The '``llvm.fpbuilt.sincos``' intrinsics compute the sine and cosine of the +first operand and returns the results via the pointers passed as the second +and third operands. + +Arguments: +"""""""""" + +The first argument to the '``llvm.fpbuiltin.sincos``' intrinsic must be +:ref:`floating-point ` or :ref:`vector ` of +floating-point values. The second and third arguments must be dereferenceable +pointers to memory which can hold a value of the first operand's type. + +Semantics: +"""""""""" + +The values produced are the floating-point sine and cosine of the first +operand and are stored using the same type as the first operand. Unless +modified by the "fpbuiltin-max-error" callsite attribute, the result is +assumed to have the accuracy of the target-default implementation of +the sincos operation for the input type. + .. _constrainedfp: diff --git a/llvm/include/llvm/Analysis/AltMathLibFuncs.def b/llvm/include/llvm/Analysis/AltMathLibFuncs.def new file mode 100644 index 0000000000000..7c37fc9736437 --- /dev/null +++ b/llvm/include/llvm/Analysis/AltMathLibFuncs.def @@ -0,0 +1,82 @@ +//===-- AltMathLibFuncs.def - Library information ---------*- C++ -*-------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This .def file will create descriptions of available fpbuilt math library +// function implementations and their constraining attributes. The current +// support is limited to a fake test library for verifying the infrastructure. +// The fake implementation can be removed when a real implementation is +// available. + +// An accuracy of 0.5 indicates that the result is exact or correctly rounded. + +#define FIXED(NL) ElementCount::getFixed(NL) +#define SCALABLE(NL) ElementCount::getScalable(NL) + +#if !(defined(TLI_DEFINE_ALTMATHFUNC)) +#define TLI_DEFINE_ALTMATHFUNC(IID, TYPE, VECSIZE, NAME, ACCURACY) \ + {IID, TYPE, VECSIZE, NAME, ACCURACY}, +#endif + + +#if defined(TLI_DEFINE_TEST_ALTMATHFUNCS) + +// Just define a few examples to test the infrastructure + +// TEST_ALTMATH_LIB Half precision implementations +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::HalfTyID, FIXED(1), "__test_altmath_fdivh_med", 2.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::HalfTyID, FIXED(1), "__test_altmath_sinh_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::HalfTyID, FIXED(1), "__test_altmath_cosh_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::HalfTyID, FIXED(1), "__test_altmath_cosh_med", 4.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::HalfTyID, FIXED(1), "__test_altmath_sqrth_cr", 0.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::HalfTyID, FIXED(1), "__test_altmath_rsqrth_cr", 0.5) + +// TEST_ALTMATH_LIB Single precision implementations +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::FloatTyID, FIXED(1), "__test_altmath_fdivf_med", 2.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(1), "__test_altmath_sinf_cr", 0.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(1), "__test_altmath_sinf_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(1), "__test_altmath_cosf_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(1), "__test_altmath_cosf_med", 4.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_tan, Type::FloatTyID, FIXED(1), "__test_altmath_tanf_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::FloatTyID, FIXED(1), "__test_altmath_sqrtf_cr", 0.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::FloatTyID, FIXED(1), "__test_altmath_sqrtf_med", 2.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_cr", 0.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_low", 4096.0) + +// TEST_ALTMATH_LIB Double precision implementations +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::DoubleTyID, FIXED(1), "__test_altmath_fdiv_med", 2.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(1), "__test_altmath_sin_cr", 0.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(1), "__test_altmath_sin_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(1), "__test_altmath_cos_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(1), "__test_altmath_cos_med", 4.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_tan, Type::DoubleTyID, FIXED(1), "__test_altmath_tan_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_sqrt_cr", 0.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_sqrt_med", 2.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_cr", 0.5) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_low", 4096.0) + +// TEST_ALTMATH_LIB 4 x float implementations +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(4), "__test_altmath_sinf4_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(4), "__test_altmath_cosf4_high", 1.0) + +// TEST_ALTMATH_LIB 8 x float implementations +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(8), "__test_altmath_sinf8_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(8), "__test_altmath_cosf8_high", 1.0) + +// TEST_ALTMATH_LIB 2 x double implementations +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(2), "__test_altmath_sin2_high", 1.0) +TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(2), "__test_altmath_cos2_high", 1.0) + + +#endif + + + +#undef TLI_DEFINE_ALTMATHFUNC +#undef TLI_DEFINE_TEST_ALTMATHFUNCS diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 87cead103f585..cf5e69474f562 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -12,6 +12,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include @@ -23,6 +24,15 @@ class Function; class Module; class Triple; +/// Describes a possible implementation of a floating point builtin operation +struct AltMathDesc { + Intrinsic::ID IntrinID; + Type::TypeID BaseFPType; + ElementCount VectorizationFactor; + StringRef FnImplName; + float Accuracy; +}; + /// Describes a possible vectorization of a function. /// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized /// by a factor 'VectorizationFactor'. @@ -68,6 +78,10 @@ class TargetLibraryInfoImpl { return static_cast((AvailableArray[F/4] >> 2*(F&3)) & 3); } + /// Alternate math library functions - sorted by intrinsic ID, then type, + /// then vector size, then accuracy + std::vector AltMathFuncDescs; + /// Vectorization descriptors - sorted by ScalarFnName. std::vector VectorDescs; /// Scalarization descriptors - same content as VectorDescs but sorted based @@ -96,6 +110,19 @@ class TargetLibraryInfoImpl { SVML // Intel short vector math library. }; + /// List of known alternate math libraries. + /// + /// The alternate math library provides a set of functions that can ve used + /// to replace llvm.fpbuiltin intrinsic calls when one or more constraining + /// attributes are specified. + /// The library can be specified by either frontend or a commandline option, + /// and then used by addAltMathFunctionsFromLib for populating the tables of + /// math function implementations. + enum AltMathLibrary { + NoAltMathLibrary, // Don't use any alternate math library + TestAltMathLibrary // Use a fake alternate math library for testing + }; + TargetLibraryInfoImpl(); explicit TargetLibraryInfoImpl(const Triple &T); @@ -147,6 +174,19 @@ class TargetLibraryInfoImpl { /// This can be used for options like -fno-builtin. void disableAllFunctions(); + /// Add a set of alternate math library function implementations with + /// attributes that can be used to select an implementation for an + /// llvm.fpbuiltin intrinsic + void addAltMathFunctions(ArrayRef Fns); + + /// Calls addAltMathFunctions with a known preset of functions for the + /// given alternate math library. + void addAltMathFunctionsFromLib(enum AltMathLibrary AltLib); + + /// Select an alternate math library implementation that meets the criteria + /// described by an FPBuiltinIntrinsic call. + StringRef selectFPBuiltinImplementation(FPBuiltinIntrinsic *Builtin) const; + /// Add a set of scalar -> vector mappings, queryable via /// getVectorizedFunction and getScalarizedFunction. void addVectorizableFunctions(ArrayRef Fns); @@ -343,6 +383,9 @@ class TargetLibraryInfo { bool isFunctionVectorizable(StringRef F) const { return Impl->isFunctionVectorizable(F); } + StringRef selectFPBuiltinImplementation(FPBuiltinIntrinsic *Builtin) const { + return Impl->selectFPBuiltinImplementation(Builtin); + } StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const { return Impl->getVectorizedFunction(F, VF); } diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index 9ac51ed9f6fad..ebab73736f4ce 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/FPBuiltinFnSelection.h" #include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" @@ -582,6 +583,7 @@ void CodeGenPassBuilder::addISelPasses(AddIRPass &addPass) const { addPass(PreISelIntrinsicLoweringPass()); derived().addIRPasses(addPass); + addPass(FPBuiltinFnSelectionPass()); derived().addCodeGenPrepare(addPass); addPassesToHandleExceptions(addPass); derived().addISelPrepare(addPass); diff --git a/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h b/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h new file mode 100644 index 0000000000000..6fc04a63c15fe --- /dev/null +++ b/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h @@ -0,0 +1,29 @@ +//===- FPBuiltinFnSelection.h - Pre-ISel intrinsic lowering pass ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements alternate math library implementation selection for +// llvm.fpbuiltin.* intrinsics. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_FPBUILTINFNSELECTION_H +#define LLVM_CODEGEN_FPBUILTINFNSELECTION_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +struct FPBuiltinFnSelectionPass + : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_FPBUILTINFNSELECTION_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def index 8efd1d2e95e93..e325086b28206 100644 --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -39,6 +39,7 @@ FUNCTION_PASS("mergeicmps", MergeICmpsPass, ()) FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) FUNCTION_PASS("consthoist", ConstantHoistingPass, ()) +FUNCTION_PASS("fpbuiltin-fn-selection", FPBuiltinFnSelectionPass, ()) FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 9dcdbd55b4d52..94f5be382024d 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -450,6 +450,10 @@ namespace llvm { /// evaluation. ModulePass *createPreISelIntrinsicLoweringPass(); + /// This pass lowers the \@llvm.fpbuiltin.{operation} intrinsics to + /// matching library function calls based on call site attributes. + FunctionPass *createFPBuiltinFnSelectionPass(); + /// GlobalMerge - This pass merges internal (by default) globals into structs /// to enable reuse of a base pointer by indexed addressing modes. /// It can also be configured to focus on size optimizations only. diff --git a/llvm/include/llvm/IR/FPBuiltinOps.def b/llvm/include/llvm/IR/FPBuiltinOps.def new file mode 100644 index 0000000000000..2d793377fa49b --- /dev/null +++ b/llvm/include/llvm/IR/FPBuiltinOps.def @@ -0,0 +1,59 @@ +//===--- llvm/IR/FPBuiltinOps.def - Constrained intrinsics ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines properties of floating point builtin intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef OPERATION +#define OPERATION(N,I) +#endif + +// Arguments of the entries are: +// - operation name. +// - name of the fpbuiltin intrinsic to represent this operation. + +// These are definitions for instructions, that are converted into constrained +// intrinsics. +// +OPERATION(FAdd, fpbuiltin_fadd) +OPERATION(FSub, fpbuiltin_fsub) +OPERATION(FMul, fpbuiltin_fmul) +OPERATION(FDiv, fpbuiltin_fdiv) +OPERATION(FRem, fpbuiltin_frem) +OPERATION(Sin, fpbuiltin_sin) +OPERATION(Cos, fpbuiltin_cos) +OPERATION(Tan, fpbuiltin_tan) +OPERATION(Sinh, fpbuiltin_sinh) +OPERATION(Cosh, fpbuiltin_cosh) +OPERATION(Tanh, fpbuiltin_tanh) +OPERATION(Asin, fpbuiltin_asin) +OPERATION(Acos, fpbuiltin_acos) +OPERATION(Atan, fpbuiltin_atan) +OPERATION(Atan2, fpbuiltin_atan2) +OPERATION(Asinh, fpbuiltin_asinh) +OPERATION(Acosh, fpbuiltin_acosh) +OPERATION(Atanh, fpbuiltin_atanh) +OPERATION(Exp, fpbuiltin_exp) +OPERATION(Exp2, fpbuiltin_exp2) +OPERATION(Exp10, fpbuiltin_exp10) +OPERATION(Expm1, fpbuiltin_expm1) +OPERATION(Log, fpbuiltin_log) +OPERATION(Log2, fpbuiltin_log2) +OPERATION(Log10, fpbuiltin_log10) +OPERATION(Log1p, fpbuiltin_log1p) +OPERATION(Hypot, fpbuiltin_hypot) +OPERATION(Pow, fpbuiltin_pow) +OPERATION(Ldexp, fpbuiltin_ldexp) +OPERATION(Sqrt, fpbuiltin_sqrt) +OPERATION(Rsqrt, fpbuiltin_rsqrt) +OPERATION(Erf, fpbuiltin_erf) +OPERATION(Erfc, fpbuiltin_erfc) +OPERATION(Sincos, fpbuiltin_sincos) + +#undef OPERATION diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index e9dfd04d40db4..fad8e1f812bc9 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -23,6 +23,7 @@ #ifndef LLVM_IR_INTRINSICINST_H #define LLVM_IR_INTRINSICINST_H +#include "llvm/ADT/StringSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -591,6 +592,31 @@ class VPCmpIntrinsic : public VPIntrinsic { /// @} }; +/// This is the common base class for floating point builtin intrinsics. +class FPBuiltinIntrinsic : public IntrinsicInst { +public: + static const std::string FPBUILTIN_PREFIX; + static const std::string FP_MAX_ERROR; + + std::optional getRequiredAccuracy() const; + + Type::TypeID getBaseTypeID() const; + ElementCount getElementCount() const; + + /// Check the callsite attributes for this FPBuiltinIntrinsic against a list + /// of FP attributes that the caller knows how to process to see if the + /// current intrinsic has unrecognized attributes + bool hasUnrecognizedFPAttrs(const StringSet<> HandledAttrs); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// @{ + static bool classof(const IntrinsicInst *I); + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + /// @} +}; + /// This is the common base class for constrained floating point intrinsics. class ConstrainedFPIntrinsic : public IntrinsicInst { public: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 41dd958e068d7..f79c688d1d2a4 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -767,6 +767,108 @@ def int_is_fpclass [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg>]>; +//===----------------- Floating Point Builtin Intrinsics ------------------===// +// +// These intrinsics are intended as explicitly replaceable versions of common +// floating point math operations. Passes must check for call site attributes +// that constrain the behavior of these intrinsics before transforming them in +// any way. +// +// While many of these operations correspond to functions in the standard C +// math library, these intrinsics are explicitly intended to be replaceable by +// by alternate implementations. +// + +let IntrProperties = [IntrNoMem, IntrWillReturn] in { + def int_fpbuiltin_fadd : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + def int_fpbuiltin_fsub : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + def int_fpbuiltin_fmul : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + def int_fpbuiltin_fdiv : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + def int_fpbuiltin_frem : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + + def int_fpbuiltin_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_cos : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_tan : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_sinh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_cosh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_tanh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_asin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_acos : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_atan : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_atan2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + def int_fpbuiltin_asinh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_acosh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_atanh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + + def int_fpbuiltin_exp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_exp2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_exp10 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_expm1 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_log : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_log2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_log10 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_log1p : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + + def int_fpbuiltin_hypot : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + def int_fpbuiltin_pow : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0> ]>; + def int_fpbuiltin_ldexp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i32_ty> ]>; + + def int_fpbuiltin_sqrt : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_rsqrt : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + + def int_fpbuiltin_erf : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; + def int_fpbuiltin_erfc : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0> ]>; +} + +let IntrProperties = [IntrArgMemOnly, IntrWillReturn] in { + def int_fpbuiltin_sincos : DefaultAttrsIntrinsic<[], + [ llvm_anyfloat_ty, + llvm_ptr_ty, + llvm_ptr_ty ]>; +} + //===--------------- Constrained Floating Point Intrinsics ----------------===// // diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 29c93e391b613..1e073cebc5cfe 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -137,6 +137,7 @@ void initializeExpandVectorPredicationPass(PassRegistry &); void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&); void initializeFEntryInserterPass(PassRegistry&); +void initializeFPBuiltinFnSelectionLegacyPassPass(PassRegistry&); void initializeFinalizeISelPass(PassRegistry&); void initializeFinalizeMachineBundlesPass(PassRegistry&); void initializeFixIrreduciblePass(PassRegistry &); diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 35811f7b04f17..8ea6794287df4 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -17,6 +17,15 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt ClAltMathLibrary( + "alt-math-library", cl::Hidden, + cl::desc("Alternate floating point math library"), + cl::init(TargetLibraryInfoImpl::NoAltMathLibrary), + cl::values(clEnumValN(TargetLibraryInfoImpl::NoAltMathLibrary, "none", + "No alternate math library"), + clEnumValN(TargetLibraryInfoImpl::TestAltMathLibrary, "test", + "Fake library used for testing"))); + static cl::opt ClVectorLibrary( "vector-library", cl::Hidden, cl::desc("Vector functions library"), cl::init(TargetLibraryInfoImpl::NoLibrary), @@ -868,6 +877,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary); + TLI.addAltMathFunctionsFromLib(ClAltMathLibrary); } TargetLibraryInfoImpl::TargetLibraryInfoImpl() { @@ -893,6 +903,7 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; + AltMathFuncDescs = TLI.AltMathFuncDescs; } TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) @@ -906,6 +917,7 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) AvailableArray); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; + AltMathFuncDescs = TLI.AltMathFuncDescs; } TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { @@ -916,6 +928,9 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoI ShouldSignExtI32Return = TLI.ShouldSignExtI32Return; SizeOfInt = TLI.SizeOfInt; memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); + VectorDescs = TLI.VectorDescs; + ScalarDescs = TLI.ScalarDescs; + AltMathFuncDescs = TLI.AltMathFuncDescs; return *this; } @@ -928,6 +943,9 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl && SizeOfInt = TLI.SizeOfInt; std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), AvailableArray); + VectorDescs = TLI.VectorDescs; + ScalarDescs = TLI.ScalarDescs; + AltMathFuncDescs = TLI.AltMathFuncDescs; return *this; } @@ -1128,6 +1146,80 @@ void TargetLibraryInfoImpl::disableAllFunctions() { memset(AvailableArray, 0, sizeof(AvailableArray)); } +static bool compareAltMathDescs(const AltMathDesc &LHS, + const AltMathDesc &RHS) { + if (LHS.IntrinID != RHS.IntrinID) + return LHS.IntrinID < RHS.IntrinID; + if (LHS.BaseFPType != RHS.BaseFPType) + return LHS.BaseFPType < RHS.BaseFPType; + if (LHS.VectorizationFactor != RHS.VectorizationFactor) { + // Sort scalar types ahead of vector types + if (LHS.VectorizationFactor.isScalar() != + RHS.VectorizationFactor.isScalar()) + return LHS.VectorizationFactor.isScalar() > + RHS.VectorizationFactor.isScalar(); + assert((LHS.VectorizationFactor.isVector() && + RHS.VectorizationFactor.isVector()) && + "Unexpected vectorization factor in alt math fn desc"); + // Sort scaleable vector types ahead of fixed vector types + if (LHS.VectorizationFactor.isScalable() != + RHS.VectorizationFactor.isScalable()) + return LHS.VectorizationFactor.isScalable() > + RHS.VectorizationFactor + .isScalable(); + // For non-scaleable vectors, this will be the fixed size + // For scaleable vectors, it's the size that's multiplied by the vscale + return LHS.VectorizationFactor.getKnownMinValue() < + RHS.VectorizationFactor.getKnownMinValue(); + } + // Sort in order of descending accuracy + return LHS.Accuracy > RHS.Accuracy; +} + +void TargetLibraryInfoImpl::addAltMathFunctions(ArrayRef Fns) { + llvm::append_range(AltMathFuncDescs, Fns); + llvm::sort(AltMathFuncDescs, compareAltMathDescs); +} + +void TargetLibraryInfoImpl::addAltMathFunctionsFromLib( + enum AltMathLibrary AltLib) { + switch (AltLib) { + case TestAltMathLibrary: { + const AltMathDesc AltMathFuncs[] = { + #define TLI_DEFINE_TEST_ALTMATHFUNCS + #include "llvm/Analysis/AltMathLibFuncs.def" + }; + addAltMathFunctions(AltMathFuncs); + break; + } + case NoAltMathLibrary: + break; + } +} + +/// Select an alternate math library implementation that meets the criteria +/// described by an FPBuiltinIntrinsic call. +StringRef TargetLibraryInfoImpl::selectFPBuiltinImplementation( + FPBuiltinIntrinsic *Builtin) const { + // TODO: Handle the case of no specified accuracy. + if (Builtin->getRequiredAccuracy() == std::nullopt) + return StringRef(); + AltMathDesc RequiredDesc = {Builtin->getIntrinsicID(), + Builtin->getBaseTypeID(), + Builtin->getElementCount(), + "", Builtin->getRequiredAccuracy().value()}; + std::vector::const_iterator I = + llvm::lower_bound(AltMathFuncDescs, RequiredDesc, compareAltMathDescs); + if (I == AltMathFuncDescs.end()) + return StringRef(); // TODO: Report fatal error? + // No match found + if (I->IntrinID != Builtin->getIntrinsicID() || + I->BaseFPType != Builtin->getBaseTypeID() || + I->Accuracy > Builtin->getRequiredAccuracy().value()) + return StringRef(); // TODO: Report fatal error? + return I->FnImplName; +} + static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) { return LHS.ScalarFnName < RHS.ScalarFnName; } diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 9bd571af8b47d..1763be78e1a7f 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -62,6 +62,7 @@ add_llvm_component_library(LLVMCodeGen ExpandVectorPredication.cpp FaultMaps.cpp FEntryInserter.cpp + FPBuiltinFnSelection.cpp FinalizeISel.cpp FixupStatepointCallerSaved.cpp FuncletLayout.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 32709091370f1..281a2d211ccf2 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -42,6 +42,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); + initializeFPBuiltinFnSelectionLegacyPassPass(Registry); initializeFinalizeISelPass(Registry); initializeFinalizeMachineBundlesPass(Registry); initializeFixupStatepointCallerSavedPass(Registry); diff --git a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp new file mode 100644 index 0000000000000..74217aa8cd08c --- /dev/null +++ b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp @@ -0,0 +1,181 @@ +//===- FPBuiltinFnSelection.cpp - Pre-ISel intrinsic lowering pass --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements alternate math library implementation selection for +// llvm.fpbuiltin.* intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FPBuiltinFnSelection.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; + +#define DEBUG_TYPE "fpbuiltin-fn-selection" + +static bool replaceWithAltMathFunction(FPBuiltinIntrinsic &BuiltinCall, + const StringRef ImplName) { + Module *M = BuiltinCall.getModule(); + + Function *OldFunc = BuiltinCall.getCalledFunction(); + + // Check if the alt math library function is already declared in this module, + // otherwise insert it. + Function *ImplFunc = M->getFunction(ImplName); + if (!ImplFunc) { + ImplFunc = Function::Create(OldFunc->getFunctionType(), + Function::ExternalLinkage, ImplName, *M); + // TODO: Copy non-builtin attributes ImplFunc->copyAttributesFrom(OldFunc); + } + + // Replace the call to the fpbuiltin intrinsic with a call + // to the corresponding function from the alternate math library. + IRBuilder<> IRBuilder(&BuiltinCall); + SmallVector Args(BuiltinCall.args()); + // Preserve the operand bundles. + SmallVector OpBundles; + BuiltinCall.getOperandBundlesAsDefs(OpBundles); + CallInst *Replacement = IRBuilder.CreateCall(ImplFunc, Args, OpBundles); + assert(OldFunc->getFunctionType() == ImplFunc->getFunctionType() && + "Expecting function types to be identical"); + BuiltinCall.replaceAllUsesWith(Replacement); + // TODO: fpbuiltin.sincos won't be reported as an FPMathOperator + // Do we need to do anything about that? + if (isa(Replacement)) { + // Preserve fast math flags for FP math. + Replacement->copyFastMathFlags(&BuiltinCall); + } + + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" + << OldFunc->getName() << "` with call to `" << ImplName + << "`.\n"); + return true; +} + +static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, + FPBuiltinIntrinsic &BuiltinCall) { + LLVM_DEBUG({ + dbgs() << "Selecting an implementation for " + << BuiltinCall.getCalledFunction()->getName() << " with accuracy = "; + if (BuiltinCall.getRequiredAccuracy() == std::nullopt) + dbgs() << "(none)\n"; + else + dbgs() << BuiltinCall.getRequiredAccuracy().value() << "\n"; + }); + +// if (BuiltinCall->hasUnecognizedFPAttrs.push_back(); + StringSet RecognizedAttrs = { FPBuiltinIntrinsic::FP_MAX_ERROR }; + if (BuiltinCall.hasUnrecognizedFPAttrs(RecognizedAttrs)) { + report_fatal_error( + Twine(BuiltinCall.getCalledFunction()->getName()) + + Twine(" was called with unrecognized floating-point attributes.\n"), + false); + return false; + } + + + + /// Call TLI to select a function implementation to call + StringRef ImplName = TLI.selectFPBuiltinImplementation(&BuiltinCall); + if (ImplName.empty()) { + LLVM_DEBUG(dbgs() << "No matching implementation found!\n"); + std::string RequiredAccuracy; + if (BuiltinCall.getRequiredAccuracy() == std::nullopt) + RequiredAccuracy = "(none)"; + else + RequiredAccuracy = + formatv("{0}", BuiltinCall.getRequiredAccuracy().value()); + + report_fatal_error(Twine(BuiltinCall.getCalledFunction()->getName()) + + Twine(" was called with required accuracy = ") + + Twine(RequiredAccuracy) + + Twine(" but no suitable implementation was found.\n"), + false); + return false; + } + + LLVM_DEBUG(dbgs() << "Selected " << ImplName << "\n"); + + return replaceWithAltMathFunction(BuiltinCall, ImplName); +} + +static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { + bool Changed = false; + SmallVector ReplacedCalls; + for (auto &I : instructions(F)) { + if (auto *CI = dyn_cast(&I)) { + if (selectFnForFPBuiltinCalls(TLI, *CI)) { + ReplacedCalls.push_back(CI); + Changed = true; + } + } + } + // Erase the calls to the intrinsics that have been replaced + // with calls to the alternate math library. + for (auto *CI : ReplacedCalls) { + CI->eraseFromParent(); + } + return Changed; +} + +namespace { + +class FPBuiltinFnSelectionLegacyPass : public FunctionPass { +public: + static char ID; + + FPBuiltinFnSelectionLegacyPass() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override { + const TargetLibraryInfo *TLI = + &getAnalysis().getTLI(F); + + return runImpl(*TLI, F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + } +}; + +} // end anonymous namespace + +char FPBuiltinFnSelectionLegacyPass::ID; + +INITIALIZE_PASS_BEGIN(FPBuiltinFnSelectionLegacyPass, DEBUG_TYPE, + "FPBuiltin Function Selection", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(FPBuiltinFnSelectionLegacyPass, DEBUG_TYPE, + "FPBuiltin Function Selection", false, false) + +FunctionPass *llvm::createFPBuiltinFnSelectionPass() { + return new FPBuiltinFnSelectionLegacyPass; +} + +PreservedAnalyses FPBuiltinFnSelectionPass::run(Function &F, + FunctionAnalysisManager &AM) { + const TargetLibraryInfo &TLI = AM.getResult(F); + bool Changed = runImpl(TLI, F); + if (Changed) { + PreservedAnalyses PA; + PA.preserveSet(); + PA.preserve(); + return PA; + } else { + // The pass did not replace any calls, hence it preserves all analyses. + return PreservedAnalyses::all(); + } +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 115f266a71506..d204aa191e695 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1087,6 +1087,7 @@ bool TargetPassConfig::addISelPasses() { addPass(createExpandLargeDivRemPass()); addPass(createExpandLargeFpConvertPass()); addIRPasses(); + addPass(createFPBuiltinFnSelectionPass()); addCodeGenPrepare(); addPassesToHandleExceptions(); addISelPrepare(); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 6fd59d8070ae4..fd7575c518d53 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -275,6 +275,75 @@ Value *InstrProfIncrementInst::getStep() const { return ConstantInt::get(Type::getInt64Ty(Context), 1); } +Type::TypeID FPBuiltinIntrinsic::getBaseTypeID() const { + // All currently supported FP builtins are characterized by the type of their + // first argument. Since llvm.fpbuiltin.sincos doesn't return a value, using + // the type of the first argument is the most consistent technique. + Type *OperandTy = getArgOperand(0)->getType(); + assert((OperandTy->isFloatingPointTy() || + (OperandTy->isVectorTy() && + OperandTy->getScalarType()->isFloatingPointTy())) && + "Unexpected type for floating point builtin intrinsic!"); + return OperandTy->getScalarType()->getTypeID(); +} + +ElementCount FPBuiltinIntrinsic::getElementCount() const { + Type *OperandTy = getArgOperand(0)->getType(); + assert((OperandTy->isFloatingPointTy() || + (OperandTy->isVectorTy() && + OperandTy->getScalarType()->isFloatingPointTy())) && + "Unexpected type for floating point builtin intrinsic!"); + if (auto *VecTy = dyn_cast(OperandTy)) + return VecTy->getElementCount(); + return ElementCount::getFixed(1); +} + +const std::string FPBuiltinIntrinsic::FPBUILTIN_PREFIX = "fpbuiltin-"; +const std::string FPBuiltinIntrinsic::FP_MAX_ERROR = "fpbuiltin-max-error"; + +std::optional FPBuiltinIntrinsic::getRequiredAccuracy() const { + if (!hasFnAttr(FP_MAX_ERROR)) + return std::nullopt; + // This should be a string attribute with a floating-point value + // If it isn't the IR verifier should report the problem. Here + // we handle that as if the attribute were absent. + // TODO: Create Attribute::getValueAsDouble()? + double Accuracy; + // getAsDouble returns false if it succeeds + if (getFnAttr(FP_MAX_ERROR).getValueAsString().getAsDouble(Accuracy)) + return std::nullopt; + return (float)Accuracy; +} + +bool FPBuiltinIntrinsic::hasUnrecognizedFPAttrs( + const StringSet<> recognizedAttrs) { + AttributeSet FnAttrs = getAttributes().getFnAttrs(); + for (const Attribute &Attr : FnAttrs) { + if (!Attr.isStringAttribute()) + continue; + auto AttrStr = Attr.getKindAsString(); + if (!AttrStr.starts_with(FPBUILTIN_PREFIX)) + continue; + if (!recognizedAttrs.contains(AttrStr)) + return true; + } + return false; +} + +bool FPBuiltinIntrinsic::classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { +#define OPERATION(NAME, INTRINSIC) \ + case Intrinsic::INTRINSIC: +#include "llvm/IR/FPBuiltinOps.def" + return true; + default: + return false; + } +} + + + + std::optional ConstrainedFPIntrinsic::getRoundingMode() const { unsigned NumOperands = arg_size(); Metadata *MD = nullptr; diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index 8d4556c6bda1c..f93a863dab4be 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -28,6 +28,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: AArch64 Stack Tagging ; CHECK-NEXT: SME ABI Pass +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: Exception handling preparation ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 1e77970721f65..cb272bb306192 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -88,6 +88,7 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: SME ABI Pass +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Type Promotion diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 9000f10cad3be..633a9b8fe3037 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -50,6 +50,7 @@ ; GCN-O0-NEXT: Expand vector predication intrinsics ; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics ; GCN-O0-NEXT: Expand reduction intrinsics +; GCN-O0-NEXT: FPBuiltin Function Selection ; GCN-O0-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O0-NEXT: SYCL Add Implicit Global Offset ; GCN-O0-NEXT: AMDGPU Attributor @@ -228,6 +229,7 @@ ; GCN-O1-NEXT: Expand reduction intrinsics ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: TLS Variable Hoist +; GCN-O1-NEXT: FPBuiltin Function Selection ; GCN-O1-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O1-NEXT: SYCL Add Implicit Global Offset ; GCN-O1-NEXT: AMDGPU Attributor @@ -516,6 +518,7 @@ ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: TLS Variable Hoist ; GCN-O1-OPTS-NEXT: Early CSE +; GCN-O1-OPTS-NEXT: FPBuiltin Function Selection ; GCN-O1-OPTS-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O1-OPTS-NEXT: SYCL Add Implicit Global Offset ; GCN-O1-OPTS-NEXT: AMDGPU Attributor @@ -818,6 +821,7 @@ ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: TLS Variable Hoist ; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: FPBuiltin Function Selection ; GCN-O2-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O2-NEXT: SYCL Add Implicit Global Offset ; GCN-O2-NEXT: AMDGPU Attributor @@ -1133,6 +1137,7 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering +; GCN-O3-NEXT: FPBuiltin Function Selection ; GCN-O3-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O3-NEXT: SYCL Add Implicit Global Offset ; GCN-O3-NEXT: AMDGPU Attributor diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 2031158017d6b..4e0c43d28c511 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -50,6 +50,7 @@ ; CHECK-NEXT: Transform functions to use DSP intrinsics ; CHECK-NEXT: Complex Deinterleaving Pass ; CHECK-NEXT: Interleaved Access Pass +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: Type Promotion ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/Generic/fp-builtin-intrinsics-nomatch.ll b/llvm/test/CodeGen/Generic/fp-builtin-intrinsics-nomatch.ll new file mode 100644 index 0000000000000..364e4f780eee6 --- /dev/null +++ b/llvm/test/CodeGen/Generic/fp-builtin-intrinsics-nomatch.ll @@ -0,0 +1,16 @@ +; RUN: not opt -alt-math-library=test -fpbuiltin-fn-selection -S < %s 2>&1 | FileCheck %s + +; Basic test for fp-builtin intrinsics error handling when no suitable +; implementation is available. + +; CHECK: LLVM ERROR: llvm.fpbuiltin.cos.f32 was called with required accuracy = 0.50 but no suitable implementation was found. + +define void @test_scalar_cr(float %f) { +entry: + %t1 = call float @llvm.fpbuiltin.cos.f32(float %f) #0 + ret void +} + +declare float @llvm.fpbuiltin.cos.f32(float) + +attributes #0 = { "fpbuiltin-max-error"="0.5" } diff --git a/llvm/test/CodeGen/Generic/fp-builtin-intrinsics-unhandled.ll b/llvm/test/CodeGen/Generic/fp-builtin-intrinsics-unhandled.ll new file mode 100644 index 0000000000000..47377270d5c23 --- /dev/null +++ b/llvm/test/CodeGen/Generic/fp-builtin-intrinsics-unhandled.ll @@ -0,0 +1,16 @@ +; RUN: not opt -alt-math-library=test -fpbuiltin-fn-selection -S < %s 2>&1 | FileCheck %s + +; Basic test for fp-builtin intrinsics error handling when the callsite +; contains an unrecognized fp attribute. + +; CHECK: LLVM ERROR: llvm.fpbuiltin.cos.f32 was called with unrecognized floating-point attributes + +define void @test_scalar_cr(float %f) { +entry: + %t1 = call float @llvm.fpbuiltin.cos.f32(float %f) #0 + ret void +} + +declare float @llvm.fpbuiltin.cos.f32(float) + +attributes #0 = { "fpbuiltin-unknown"="true" } diff --git a/llvm/test/CodeGen/Generic/fp-builtin-intrinsics.ll b/llvm/test/CodeGen/Generic/fp-builtin-intrinsics.ll new file mode 100644 index 0000000000000..f71e84490dba3 --- /dev/null +++ b/llvm/test/CodeGen/Generic/fp-builtin-intrinsics.ll @@ -0,0 +1,186 @@ +; RUN: opt -alt-math-library=test -fpbuiltin-fn-selection -S < %s | FileCheck %s + +; Basic argument tests for fp-builtin intrinsics. +; Only a few representative functions are tested. + +; CHECK-LABEL: @test_scalar_cr +; CHECK: call half @__test_altmath_sqrth_cr +; CHECK: call half @__test_altmath_rsqrth_cr +; CHECK: call float @__test_altmath_sinf_cr +; CHECK: call float @__test_altmath_sqrtf_cr +; CHECK: call float @__test_altmath_rsqrtf_cr +; CHECK: call double @__test_altmath_sin_cr +; CHECK: call double @__test_altmath_sqrt_cr +; CHECK: call double @__test_altmath_rsqrt_cr +define void @test_scalar_cr(half %h, float %f, double %d) { +entry: + %t1 = call half @llvm.fpbuiltin.sqrt.f16(half %h) #0 + %t2 = call half @llvm.fpbuiltin.rsqrt.f16(half %h) #0 + %t3 = call float @llvm.fpbuiltin.sin.f32(float %f) #0 + %t4 = call float @llvm.fpbuiltin.sqrt.f32(float %f) #0 + %t5 = call float @llvm.fpbuiltin.rsqrt.f32(float %f) #0 + %t6 = call double @llvm.fpbuiltin.sin.f64(double %d) #0 + %t7 = call double @llvm.fpbuiltin.sqrt.f64(double %d) #0 + %t8 = call double @llvm.fpbuiltin.rsqrt.f64(double %d) #0 + ret void +} + +; CHECK-LABEL: @test_scalar_1_0 +; CHECK: call half @__test_altmath_sinh_high +; CHECK: call half @__test_altmath_cosh_high +; CHECK: call float @__test_altmath_sinf_high +; CHECK: call float @__test_altmath_cosf_high +; CHECK: call float @__test_altmath_tanf_high +; CHECK: call float @__test_altmath_rsqrtf_high +; CHECK: call double @__test_altmath_sin_high +; CHECK: call double @__test_altmath_cos_high +; CHECK: call double @__test_altmath_tan_high +; CHECK: call double @__test_altmath_rsqrt_high +define void @test_scalar_1_0(half %h, float %f, double %d) { +entry: + %t1 = call half @llvm.fpbuiltin.sin.f16(half %h) #1 + %t2 = call half @llvm.fpbuiltin.cos.f16(half %h) #1 + %t3 = call float @llvm.fpbuiltin.sin.f32(float %f) #1 + %t4 = call float @llvm.fpbuiltin.cos.f32(float %f) #1 + %t5 = call float @llvm.fpbuiltin.tan.f32(float %f) #1 + %t6 = call float @llvm.fpbuiltin.rsqrt.f32(float %f) #1 + %t7 = call double @llvm.fpbuiltin.sin.f64(double %d) #1 + %t8 = call double @llvm.fpbuiltin.cos.f64(double %d) #1 + %t9 = call double @llvm.fpbuiltin.tan.f64(double %d) #1 + %t10 = call double @llvm.fpbuiltin.rsqrt.f64(double %d) #1 + ret void +} + +; CHECK-LABEL: @test_scalar_2_5 +; CHECK: call half @__test_altmath_fdivh_med +; CHECK: call float @__test_altmath_fdivf_med +; CHECK: call float @__test_altmath_sqrtf_med +; CHECK: call double @__test_altmath_fdiv_med +; CHECK: call double @__test_altmath_sqrt_med +define void @test_scalar_2_5(half %h1, half %h2, float %f1, float %f2, + double %d1, double %d2) { +entry: + %t1 = call half @llvm.fpbuiltin.fdiv.f16(half %h1, half %h2) #2 + %t2 = call float @llvm.fpbuiltin.fdiv.f32(float %f1, float %f2) #2 + %t3 = call float @llvm.fpbuiltin.sqrt.f32(float %f1) #2 + %t4 = call double @llvm.fpbuiltin.fdiv.f64(double %d1, double %d2) #2 + %t5 = call double @llvm.fpbuiltin.sqrt.f64(double %d1) #2 + ret void +} + +; CHECK-LABEL: @test_scalar_4_0 +; CHECK: call half @__test_altmath_cosh_med +; CHECK: call float @__test_altmath_cosf_med +; CHECK: call double @__test_altmath_cos_med +define void @test_scalar_4_0(half %h, float %f, double %d) { +entry: + %t1 = call half @llvm.fpbuiltin.cos.f16(half %h) #3 + %t2 = call float @llvm.fpbuiltin.cos.f32(float %f) #3 + %t3 = call double @llvm.fpbuiltin.cos.f64(double %d) #3 + ret void +} + +; CHECK-LABEL: @test_scalar_4096 +; CHECK: call float @__test_altmath_rsqrtf_low +; CHECK: call double @__test_altmath_rsqrt_low +define void @test_scalar_4096(float %f, double %d) { +entry: + %t6 = call float @llvm.fpbuiltin.rsqrt.f32(float %f) #4 + %t10 = call double @llvm.fpbuiltin.rsqrt.f64(double %d) #4 + ret void +} + +; CHECK-LABEL: @test_vector_1_0 +; CHECK: call <4 x float> @__test_altmath_sinf4_high +; CHECK: call <4 x float> @__test_altmath_cosf4_high +; CHECK: call <8 x float> @__test_altmath_sinf8_high +; CHECK: call <8 x float> @__test_altmath_cosf8_high +; CHECK: call <2 x double> @__test_altmath_sin2_high +; CHECK: call <2 x double> @__test_altmath_cos2_high +define void @test_vector_1_0(<4 x float> %v4f, <8 x float> %v8f, <2 x double> %vd) { +entry: + %t1 = call <4 x float> @llvm.fpbuiltin.sin.v4f32(<4 x float> %v4f) #1 + %t2 = call <4 x float> @llvm.fpbuiltin.cos.v4f32(<4 x float> %v4f) #1 + %t3 = call <8 x float> @llvm.fpbuiltin.sin.v8f32(<8 x float> %v8f) #1 + %t4 = call <8 x float> @llvm.fpbuiltin.cos.v8f32(<8 x float> %v8f) #1 + %t5 = call <2 x double> @llvm.fpbuiltin.sin.v2f64(<2 x double> %vd) #1 + %t6 = call <2 x double> @llvm.fpbuiltin.cos.v2f64(<2 x double> %vd) #1 + ret void +} + +; TODO: Add a test with different vector sizes of the same base type + + +; Test cases where the only available implementations are more accurate than +; the required accuracy (3.5) +; CHECK-LABEL: @test_scalar_inexact +; CHECK: call half @__test_altmath_fdivh_med +; CHECK: call half @__test_altmath_sinh_high +; CHECK: call half @__test_altmath_cosh_high +; CHECK: call half @__test_altmath_sqrth_cr +; CHECK: call half @__test_altmath_rsqrth_cr +; CHECK: call float @__test_altmath_fdivf_med +; CHECK: call float @__test_altmath_sinf_high +; CHECK: call float @__test_altmath_cosf_high +; CHECK: call float @__test_altmath_tanf_high +; CHECK: call float @__test_altmath_sqrtf_med +; CHECK: call float @__test_altmath_rsqrtf_high +; CHECK: call double @__test_altmath_fdiv_med +; CHECK: call double @__test_altmath_sin_high +; CHECK: call double @__test_altmath_cos_high +; CHECK: call double @__test_altmath_tan_high +; CHECK: call double @__test_altmath_sqrt_med +; CHECK: call double @__test_altmath_rsqrt_high +define void @test_scalar_inexact(half %h1, half %h2, float %f1, float %f2, + double %d1, double %d2) { +entry: + %t1 = call half @llvm.fpbuiltin.fdiv.f16(half %h1, half %h2) #5 + %t2 = call half @llvm.fpbuiltin.sin.f16(half %h1) #5 + %t3 = call half @llvm.fpbuiltin.cos.f16(half %h1) #5 + %t4 = call half @llvm.fpbuiltin.sqrt.f16(half %h1) #5 + %t5 = call half @llvm.fpbuiltin.rsqrt.f16(half %h1) #5 + %t6 = call float @llvm.fpbuiltin.fdiv.f32(float %f1, float %f2) #5 + %t7 = call float @llvm.fpbuiltin.sin.f32(float %f1) #5 + %t8 = call float @llvm.fpbuiltin.cos.f32(float %f1) #5 + %t9 = call float @llvm.fpbuiltin.tan.f32(float %f1) #5 + %t10 = call float @llvm.fpbuiltin.sqrt.f32(float %f1) #5 + %t11 = call float @llvm.fpbuiltin.rsqrt.f32(float %f1) #5 + %t12 = call double @llvm.fpbuiltin.fdiv.f64(double %d1, double %d2) #5 + %t13 = call double @llvm.fpbuiltin.sin.f64(double %d1) #5 + %t14 = call double @llvm.fpbuiltin.cos.f64(double %d1) #5 + %t15 = call double @llvm.fpbuiltin.tan.f64(double %d1) #5 + %t16 = call double @llvm.fpbuiltin.sqrt.f64(double %d1) #5 + %t17 = call double @llvm.fpbuiltin.rsqrt.f64(double %d1) #5 + ret void +} + +declare half @llvm.fpbuiltin.fdiv.f16(half, half) +declare half @llvm.fpbuiltin.sin.f16(half) +declare half @llvm.fpbuiltin.cos.f16(half) +declare half @llvm.fpbuiltin.sqrt.f16(half) +declare half @llvm.fpbuiltin.rsqrt.f16(half) +declare float @llvm.fpbuiltin.fdiv.f32(float, float) +declare float @llvm.fpbuiltin.sin.f32(float) +declare float @llvm.fpbuiltin.cos.f32(float) +declare float @llvm.fpbuiltin.tan.f32(float) +declare float @llvm.fpbuiltin.sqrt.f32(float) +declare float @llvm.fpbuiltin.rsqrt.f32(float) +declare double @llvm.fpbuiltin.fdiv.f64(double, double) +declare double @llvm.fpbuiltin.sin.f64(double) +declare double @llvm.fpbuiltin.cos.f64(double) +declare double @llvm.fpbuiltin.tan.f64(double) +declare double @llvm.fpbuiltin.sqrt.f64(double) +declare double @llvm.fpbuiltin.rsqrt.f64(double) +declare <4 x float> @llvm.fpbuiltin.sin.v4f32(<4 x float>) +declare <4 x float> @llvm.fpbuiltin.cos.v4f32(<4 x float>) +declare <8 x float> @llvm.fpbuiltin.sin.v8f32(<8 x float>) +declare <8 x float> @llvm.fpbuiltin.cos.v8f32(<8 x float>) +declare <2 x double> @llvm.fpbuiltin.sin.v2f64(<2 x double>) +declare <2 x double> @llvm.fpbuiltin.cos.v2f64(<2 x double>) + +attributes #0 = { "fpbuiltin-max-error"="0.5" } +attributes #1 = { "fpbuiltin-max-error"="1.0" } +attributes #2 = { "fpbuiltin-max-error"="2.5" } +attributes #3 = { "fpbuiltin-max-error"="4.0" } +attributes #4 = { "fpbuiltin-max-error"="4096.0" } +attributes #5 = { "fpbuiltin-max-error"="3.0" } diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index b4419e629bd2a..514627998bb55 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -69,6 +69,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index beb2ace4b809e..24ed5ec094e19 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -30,6 +30,7 @@ ; CHECK-NEXT: Expand vector predication intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: Exception handling preparation ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 76173bf8bc92d..fafd33d601293 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -61,6 +61,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 959add648245f..87605d2d9e4fd 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -31,6 +31,7 @@ ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand indirectbr instructions +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: Exception handling preparation ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 144c2da7afb29..780f1aad4f92f 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -68,6 +68,7 @@ ; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: X86 Partial Reduction ; CHECK-NEXT: Expand indirectbr instructions +; CHECK-NEXT: FPBuiltin Function Selection ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 0b0cf772ade73..c6b30d290f1d1 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -366,24 +366,24 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "amdgcn-", "polly-", "riscv-", "dxil-"}; std::vector PassNameContain = {"ehprepare"}; std::vector PassNameExact = { - "safe-stack", "cost-model", - "codegenprepare", "interleaved-load-combine", - "unreachableblockelim", "verify-safepoint-ir", - "atomic-expand", "expandvp", - "hardware-loops", "type-promotion", - "mve-tail-predication", "interleaved-access", - "global-merge", "pre-isel-intrinsic-lowering", - "expand-reductions", "indirectbr-expand", - "generic-to-nvvm", "expandmemcmp", - "loop-reduce", "lower-amx-type", - "pre-amx-config", "lower-amx-intrinsics", - "polyhedral-info", "print-polyhedral-info", - "replace-with-veclib", "jmc-instrument", - "dot-regions", "dot-regions-only", - "view-regions", "view-regions-only", - "select-optimize", "expand-large-div-rem", - "structurizecfg", "fix-irreducible", - "expand-large-fp-convert"}; + "safe-stack", "cost-model", + "codegenprepare", "interleaved-load-combine", + "unreachableblockelim", "verify-safepoint-ir", + "atomic-expand", "expandvp", + "hardware-loops", "type-promotion", + "mve-tail-predication", "interleaved-access", + "global-merge", "pre-isel-intrinsic-lowering", + "expand-reductions", "indirectbr-expand", + "generic-to-nvvm", "expandmemcmp", + "loop-reduce", "lower-amx-type", + "pre-amx-config", "lower-amx-intrinsics", + "polyhedral-info", "print-polyhedral-info", + "replace-with-veclib", "jmc-instrument", + "dot-regions", "dot-regions-only", + "view-regions", "view-regions-only", + "select-optimize", "expand-large-div-rem", + "structurizecfg", "fix-irreducible", + "expand-large-fp-convert", "fpbuiltin-fn-selection"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -464,6 +464,7 @@ int main(int argc, char **argv) { initializeESIMDVerifierPass(Registry); initializeSYCLLowerWGLocalMemoryLegacyPass(Registry); initializeSYCLMutatePrintfAddrspaceLegacyPassPass(Registry); + initializeFPBuiltinFnSelectionLegacyPassPass(Registry); #ifdef BUILD_EXAMPLES initializeExampleIRTransforms(Registry); From 74fd8ec78b2dcc6f12fa963ec81c08241bb13437 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Mon, 30 Jan 2023 15:45:19 -0800 Subject: [PATCH 2/7] Fix AMDGPU pipeline test --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 633a9b8fe3037..41fc5766e2152 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -50,9 +50,10 @@ ; GCN-O0-NEXT: Expand vector predication intrinsics ; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics ; GCN-O0-NEXT: Expand reduction intrinsics -; GCN-O0-NEXT: FPBuiltin Function Selection ; GCN-O0-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O0-NEXT: SYCL Add Implicit Global Offset +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: FPBuiltin Function Selection ; GCN-O0-NEXT: AMDGPU Attributor ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Cycle Info Analysis @@ -229,9 +230,10 @@ ; GCN-O1-NEXT: Expand reduction intrinsics ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: TLS Variable Hoist -; GCN-O1-NEXT: FPBuiltin Function Selection ; GCN-O1-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O1-NEXT: SYCL Add Implicit Global Offset +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: FPBuiltin Function Selection ; GCN-O1-NEXT: AMDGPU Attributor ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Cycle Info Analysis @@ -518,9 +520,10 @@ ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: TLS Variable Hoist ; GCN-O1-OPTS-NEXT: Early CSE -; GCN-O1-OPTS-NEXT: FPBuiltin Function Selection ; GCN-O1-OPTS-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O1-OPTS-NEXT: SYCL Add Implicit Global Offset +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: FPBuiltin Function Selection ; GCN-O1-OPTS-NEXT: AMDGPU Attributor ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Cycle Info Analysis @@ -821,9 +824,10 @@ ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: TLS Variable Hoist ; GCN-O2-NEXT: Early CSE -; GCN-O2-NEXT: FPBuiltin Function Selection ; GCN-O2-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O2-NEXT: SYCL Add Implicit Global Offset +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: FPBuiltin Function Selection ; GCN-O2-NEXT: AMDGPU Attributor ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Cycle Info Analysis @@ -1137,9 +1141,10 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering -; GCN-O3-NEXT: FPBuiltin Function Selection ; GCN-O3-NEXT: SYCL Local Accessor to Shared Memory ; GCN-O3-NEXT: SYCL Add Implicit Global Offset +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: FPBuiltin Function Selection ; GCN-O3-NEXT: AMDGPU Attributor ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Cycle Info Analysis From 6b0f579fd14a20f33f204ac7ec1c4c6b1f773490 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Tue, 31 Jan 2023 16:56:30 -0800 Subject: [PATCH 3/7] Address review comments --- llvm/lib/CodeGen/FPBuiltinFnSelection.cpp | 3 --- llvm/lib/IR/IntrinsicInst.cpp | 3 --- 2 files changed, 6 deletions(-) diff --git a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp index 74217aa8cd08c..5733e6818c3f2 100644 --- a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp +++ b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp @@ -74,7 +74,6 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, dbgs() << BuiltinCall.getRequiredAccuracy().value() << "\n"; }); -// if (BuiltinCall->hasUnecognizedFPAttrs.push_back(); StringSet RecognizedAttrs = { FPBuiltinIntrinsic::FP_MAX_ERROR }; if (BuiltinCall.hasUnrecognizedFPAttrs(RecognizedAttrs)) { report_fatal_error( @@ -84,8 +83,6 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, return false; } - - /// Call TLI to select a function implementation to call StringRef ImplName = TLI.selectFPBuiltinImplementation(&BuiltinCall); if (ImplName.empty()) { diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index fd7575c518d53..0a319b5b8acbc 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -341,9 +341,6 @@ bool FPBuiltinIntrinsic::classof(const IntrinsicInst *I) { } } - - - std::optional ConstrainedFPIntrinsic::getRoundingMode() const { unsigned NumOperands = arg_size(); Metadata *MD = nullptr; From a2f2ab6da261a41d0464793c3d71a1978f0953a6 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Wed, 1 Feb 2023 15:45:06 -0800 Subject: [PATCH 4/7] More changes to address review feedback --- llvm/docs/LangRef.rst | 6 +-- .../include/llvm/Analysis/AltMathLibFuncs.def | 2 +- llvm/include/llvm/IR/IntrinsicInst.h | 2 +- llvm/lib/IR/IntrinsicInst.cpp | 8 +-- llvm/tools/opt/opt.cpp | 54 ++++++++++++------- 5 files changed, 46 insertions(+), 26 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ae8b2f5b0e42e..f036c6eadf1d9 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -22962,12 +22962,12 @@ new callsite attributes beginning with "fpbuiltin-". All such string attribute identifiers are considered reserved for use with fpbuiltin intrinsics. No transformation should be performed on any fpbuiltin intrinsic if the -intrinsic has any callsite attributes begining with "fpbuiltin-" that that code +intrinsic has any callsite attributes begining with "fpbuiltin-" that the code performing the transformation does not recognize. Unless otherwise specified using callsite attributes, the fpbuiltin intrinsics -do not set ``errno`` or and may be assumed not to trap or raise floating-point -exceptions. +do not set ``errno`` or and are not guaranteed to maintain correct +floating-point exception state. All fpbuiltin intrinsics are overloaded intrinsics which may operate on any scalar or vector floating-point type. Not all targets support all types. diff --git a/llvm/include/llvm/Analysis/AltMathLibFuncs.def b/llvm/include/llvm/Analysis/AltMathLibFuncs.def index 7c37fc9736437..538fc3da60e28 100644 --- a/llvm/include/llvm/Analysis/AltMathLibFuncs.def +++ b/llvm/include/llvm/Analysis/AltMathLibFuncs.def @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// This .def file will create descriptions of available fpbuilt math library +// This .def file will create descriptions of available fpbuiltin math library // function implementations and their constraining attributes. The current // support is limited to a fake test library for verifying the infrastructure. // The fake implementation can be removed when a real implementation is diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index fad8e1f812bc9..1d65d47ae4641 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -596,7 +596,7 @@ class VPCmpIntrinsic : public VPIntrinsic { class FPBuiltinIntrinsic : public IntrinsicInst { public: static const std::string FPBUILTIN_PREFIX; - static const std::string FP_MAX_ERROR; + static const std::string FPBUILTIN_MAX_ERROR; std::optional getRequiredAccuracy() const; diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 0a319b5b8acbc..95e1a354488ab 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include +#include using namespace llvm; @@ -299,10 +300,11 @@ ElementCount FPBuiltinIntrinsic::getElementCount() const { } const std::string FPBuiltinIntrinsic::FPBUILTIN_PREFIX = "fpbuiltin-"; -const std::string FPBuiltinIntrinsic::FP_MAX_ERROR = "fpbuiltin-max-error"; +const std::string FPBuiltinIntrinsic::FPBUILTIN_MAX_ERROR = + "fpbuiltin-max-error"; std::optional FPBuiltinIntrinsic::getRequiredAccuracy() const { - if (!hasFnAttr(FP_MAX_ERROR)) + if (!hasFnAttr(FPBUILTIN_MAX_ERROR)) return std::nullopt; // This should be a string attribute with a floating-point value // If it isn't the IR verifier should report the problem. Here @@ -310,7 +312,7 @@ std::optional FPBuiltinIntrinsic::getRequiredAccuracy() const { // TODO: Create Attribute::getValueAsDouble()? double Accuracy; // getAsDouble returns false if it succeeds - if (getFnAttr(FP_MAX_ERROR).getValueAsString().getAsDouble(Accuracy)) + if (getFnAttr(FPBUILTIN_MAX_ERROR).getValueAsString().getAsDouble(Accuracy)) return std::nullopt; return (float)Accuracy; } diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index c6b30d290f1d1..686f3ff204b00 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -366,24 +366,42 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "amdgcn-", "polly-", "riscv-", "dxil-"}; std::vector PassNameContain = {"ehprepare"}; std::vector PassNameExact = { - "safe-stack", "cost-model", - "codegenprepare", "interleaved-load-combine", - "unreachableblockelim", "verify-safepoint-ir", - "atomic-expand", "expandvp", - "hardware-loops", "type-promotion", - "mve-tail-predication", "interleaved-access", - "global-merge", "pre-isel-intrinsic-lowering", - "expand-reductions", "indirectbr-expand", - "generic-to-nvvm", "expandmemcmp", - "loop-reduce", "lower-amx-type", - "pre-amx-config", "lower-amx-intrinsics", - "polyhedral-info", "print-polyhedral-info", - "replace-with-veclib", "jmc-instrument", - "dot-regions", "dot-regions-only", - "view-regions", "view-regions-only", - "select-optimize", "expand-large-div-rem", - "structurizecfg", "fix-irreducible", - "expand-large-fp-convert", "fpbuiltin-fn-selection"}; + "safe-stack", + "cost-model", + "codegenprepare", + "interleaved-load-combine", + "unreachableblockelim", + "verify-safepoint-ir", + "atomic-expand", + "expandvp", + "hardware-loops", + "type-promotion", + "mve-tail-predication", + "interleaved-access", + "global-merge", + "pre-isel-intrinsic-lowering", + "expand-reductions", + "indirectbr-expand", + "generic-to-nvvm", + "expandmemcmp", + "loop-reduce", + "lower-amx-type", + "pre-amx-config", + "lower-amx-intrinsics", + "polyhedral-info", + "print-polyhedral-info", + "replace-with-veclib", + "jmc-instrument", + "dot-regions", + "dot-regions-only", + "view-regions", + "view-regions-only", + "select-optimize", + "expand-large-div-rem", + "structurizecfg", + "fix-irreducible", + "expand-large-fp-convert", + "fpbuiltin-fn-selection"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; From d06738a47648622929aff2e889e213448419f6f2 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Thu, 2 Feb 2023 10:39:07 -0800 Subject: [PATCH 5/7] clang-format fixes --- llvm/include/llvm/Analysis/TargetLibraryInfo.h | 4 ++-- llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h | 3 +-- llvm/include/llvm/IR/IntrinsicInst.h | 2 +- llvm/include/llvm/InitializePasses.h | 2 +- llvm/lib/Analysis/TargetLibraryInfo.cpp | 16 +++++++--------- llvm/lib/CodeGen/FPBuiltinFnSelection.cpp | 13 +++++++------ llvm/lib/IR/IntrinsicInst.cpp | 7 +++---- 7 files changed, 22 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index cf5e69474f562..3a025df53f7c7 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -119,8 +119,8 @@ class TargetLibraryInfoImpl { /// and then used by addAltMathFunctionsFromLib for populating the tables of /// math function implementations. enum AltMathLibrary { - NoAltMathLibrary, // Don't use any alternate math library - TestAltMathLibrary // Use a fake alternate math library for testing + NoAltMathLibrary, // Don't use any alternate math library + TestAltMathLibrary // Use a fake alternate math library for testing }; TargetLibraryInfoImpl(); diff --git a/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h b/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h index 6fc04a63c15fe..9a27dbe3da38f 100644 --- a/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h +++ b/llvm/include/llvm/CodeGen/FPBuiltinFnSelection.h @@ -19,8 +19,7 @@ namespace llvm { class Module; -struct FPBuiltinFnSelectionPass - : PassInfoMixin { +struct FPBuiltinFnSelectionPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index b4c9119223b27..8c021188b6963 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -617,7 +617,7 @@ class FPBuiltinIntrinsic : public IntrinsicInst { /// Methods for support type inquiry through isa, cast, and dyn_cast: /// @{ - static bool classof(const IntrinsicInst *I); + static bool classof(const IntrinsicInst *I); static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index b08a501ffb0e2..9b156db2f6bde 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -136,7 +136,7 @@ void initializeExpandVectorPredicationPass(PassRegistry &); void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&); void initializeFEntryInserterPass(PassRegistry&); -void initializeFPBuiltinFnSelectionLegacyPassPass(PassRegistry&); +void initializeFPBuiltinFnSelectionLegacyPassPass(PassRegistry &); void initializeFinalizeISelPass(PassRegistry&); void initializeFinalizeMachineBundlesPass(PassRegistry&); void initializeFixIrreduciblePass(PassRegistry &); diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 8ea6794287df4..3e359c6729e51 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1165,8 +1165,7 @@ static bool compareAltMathDescs(const AltMathDesc &LHS, if (LHS.VectorizationFactor.isScalable() != RHS.VectorizationFactor.isScalable()) return LHS.VectorizationFactor.isScalable() > - RHS.VectorizationFactor - .isScalable(); + RHS.VectorizationFactor.isScalable(); // For non-scaleable vectors, this will be the fixed size // For scaleable vectors, it's the size that's multiplied by the vscale return LHS.VectorizationFactor.getKnownMinValue() < @@ -1186,8 +1185,8 @@ void TargetLibraryInfoImpl::addAltMathFunctionsFromLib( switch (AltLib) { case TestAltMathLibrary: { const AltMathDesc AltMathFuncs[] = { - #define TLI_DEFINE_TEST_ALTMATHFUNCS - #include "llvm/Analysis/AltMathLibFuncs.def" +#define TLI_DEFINE_TEST_ALTMATHFUNCS +#include "llvm/Analysis/AltMathLibFuncs.def" }; addAltMathFunctions(AltMathFuncs); break; @@ -1204,14 +1203,13 @@ StringRef TargetLibraryInfoImpl::selectFPBuiltinImplementation( // TODO: Handle the case of no specified accuracy. if (Builtin->getRequiredAccuracy() == std::nullopt) return StringRef(); - AltMathDesc RequiredDesc = {Builtin->getIntrinsicID(), - Builtin->getBaseTypeID(), - Builtin->getElementCount(), - "", Builtin->getRequiredAccuracy().value()}; + AltMathDesc RequiredDesc = { + Builtin->getIntrinsicID(), Builtin->getBaseTypeID(), + Builtin->getElementCount(), "", Builtin->getRequiredAccuracy().value()}; std::vector::const_iterator I = llvm::lower_bound(AltMathFuncDescs, RequiredDesc, compareAltMathDescs); if (I == AltMathFuncDescs.end()) - return StringRef(); // TODO: Report fatal error? + return StringRef(); // TODO: Report fatal error? // No match found if (I->IntrinID != Builtin->getIntrinsicID() || I->BaseFPType != Builtin->getBaseTypeID() || diff --git a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp index 5733e6818c3f2..b40d652cb7405 100644 --- a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp +++ b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp @@ -74,7 +74,7 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, dbgs() << BuiltinCall.getRequiredAccuracy().value() << "\n"; }); - StringSet RecognizedAttrs = { FPBuiltinIntrinsic::FP_MAX_ERROR }; + StringSet RecognizedAttrs = {FPBuiltinIntrinsic::FP_MAX_ERROR}; if (BuiltinCall.hasUnrecognizedFPAttrs(RecognizedAttrs)) { report_fatal_error( Twine(BuiltinCall.getCalledFunction()->getName()) + @@ -94,11 +94,12 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, RequiredAccuracy = formatv("{0}", BuiltinCall.getRequiredAccuracy().value()); - report_fatal_error(Twine(BuiltinCall.getCalledFunction()->getName()) + - Twine(" was called with required accuracy = ") + - Twine(RequiredAccuracy) + - Twine(" but no suitable implementation was found.\n"), - false); + report_fatal_error( + Twine(BuiltinCall.getCalledFunction()->getName()) + + Twine(" was called with required accuracy = ") + + Twine(RequiredAccuracy) + + Twine(" but no suitable implementation was found.\n"), + false); return false; } diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index f1c29901f9417..db1400be941ca 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -295,7 +295,7 @@ Type::TypeID FPBuiltinIntrinsic::getBaseTypeID() const { assert((OperandTy->isFloatingPointTy() || (OperandTy->isVectorTy() && OperandTy->getScalarType()->isFloatingPointTy())) && - "Unexpected type for floating point builtin intrinsic!"); + "Unexpected type for floating point builtin intrinsic!"); return OperandTy->getScalarType()->getTypeID(); } @@ -304,7 +304,7 @@ ElementCount FPBuiltinIntrinsic::getElementCount() const { assert((OperandTy->isFloatingPointTy() || (OperandTy->isVectorTy() && OperandTy->getScalarType()->isFloatingPointTy())) && - "Unexpected type for floating point builtin intrinsic!"); + "Unexpected type for floating point builtin intrinsic!"); if (auto *VecTy = dyn_cast(OperandTy)) return VecTy->getElementCount(); return ElementCount::getFixed(1); @@ -345,8 +345,7 @@ bool FPBuiltinIntrinsic::hasUnrecognizedFPAttrs( bool FPBuiltinIntrinsic::classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { -#define OPERATION(NAME, INTRINSIC) \ - case Intrinsic::INTRINSIC: +#define OPERATION(NAME, INTRINSIC) case Intrinsic::INTRINSIC: #include "llvm/IR/FPBuiltinOps.def" return true; default: From eabaf18f6abf8913871b4470cbb4ec3e6b5808d0 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Thu, 2 Feb 2023 10:53:11 -0800 Subject: [PATCH 6/7] Fix a trailing whitespace issue --- llvm/lib/CodeGen/FPBuiltinFnSelection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp index b40d652cb7405..7ba7952356e4e 100644 --- a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp +++ b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp @@ -97,7 +97,7 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, report_fatal_error( Twine(BuiltinCall.getCalledFunction()->getName()) + Twine(" was called with required accuracy = ") + - Twine(RequiredAccuracy) + + Twine(RequiredAccuracy) + Twine(" but no suitable implementation was found.\n"), false); return false; From fcf7028bfadf1de2becf1d360c96c42a23106278 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Thu, 2 Feb 2023 12:38:34 -0800 Subject: [PATCH 7/7] Fix build error --- llvm/lib/CodeGen/FPBuiltinFnSelection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp index 7ba7952356e4e..0497cd7ab92ef 100644 --- a/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp +++ b/llvm/lib/CodeGen/FPBuiltinFnSelection.cpp @@ -74,7 +74,7 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, dbgs() << BuiltinCall.getRequiredAccuracy().value() << "\n"; }); - StringSet RecognizedAttrs = {FPBuiltinIntrinsic::FP_MAX_ERROR}; + StringSet RecognizedAttrs = {FPBuiltinIntrinsic::FPBUILTIN_MAX_ERROR}; if (BuiltinCall.hasUnrecognizedFPAttrs(RecognizedAttrs)) { report_fatal_error( Twine(BuiltinCall.getCalledFunction()->getName()) +