intel · bader · Feb 3, 2023 · Jan 26, 2023 · Jan 30, 2023 · Feb 1, 2023
@@ -0,0 +1,82 @@
+//===-- AltMathLibFuncs.def - Library information ---------*- C++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This .def file will create descriptions of available fpbuilt math library
+// function implementations and their constraining attributes. The current
+// support is limited to a fake test library for verifying the infrastructure.
+// The fake implementation can be removed when a real implementation is
+// available.
+
+// An accuracy of 0.5 indicates that the result is exact or correctly rounded.
+
+#define FIXED(NL) ElementCount::getFixed(NL)
+#define SCALABLE(NL) ElementCount::getScalable(NL)
+
+#if !(defined(TLI_DEFINE_ALTMATHFUNC))
+#define TLI_DEFINE_ALTMATHFUNC(IID, TYPE, VECSIZE, NAME, ACCURACY)  \
+    {IID, TYPE, VECSIZE, NAME, ACCURACY},
+#endif
+
+
+#if defined(TLI_DEFINE_TEST_ALTMATHFUNCS)
+
+// Just define a few examples to test the infrastructure
+
+// TEST_ALTMATH_LIB Half precision implementations
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::HalfTyID, FIXED(1), "__test_altmath_fdivh_med", 2.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::HalfTyID, FIXED(1), "__test_altmath_sinh_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::HalfTyID, FIXED(1), "__test_altmath_cosh_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::HalfTyID, FIXED(1), "__test_altmath_cosh_med", 4.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::HalfTyID, FIXED(1), "__test_altmath_sqrth_cr", 0.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::HalfTyID, FIXED(1), "__test_altmath_rsqrth_cr", 0.5)
+
+// TEST_ALTMATH_LIB Single precision implementations
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::FloatTyID, FIXED(1), "__test_altmath_fdivf_med", 2.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(1), "__test_altmath_sinf_cr", 0.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(1), "__test_altmath_sinf_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(1), "__test_altmath_cosf_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(1), "__test_altmath_cosf_med", 4.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_tan, Type::FloatTyID, FIXED(1), "__test_altmath_tanf_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::FloatTyID, FIXED(1), "__test_altmath_sqrtf_cr", 0.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::FloatTyID, FIXED(1), "__test_altmath_sqrtf_med", 2.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_cr", 0.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::FloatTyID, FIXED(1), "__test_altmath_rsqrtf_low", 4096.0)
+
+// TEST_ALTMATH_LIB Double precision implementations
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_fdiv, Type::DoubleTyID, FIXED(1), "__test_altmath_fdiv_med", 2.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(1), "__test_altmath_sin_cr", 0.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(1), "__test_altmath_sin_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(1), "__test_altmath_cos_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(1), "__test_altmath_cos_med", 4.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_tan, Type::DoubleTyID, FIXED(1), "__test_altmath_tan_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_sqrt_cr", 0.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_sqrt_med", 2.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_cr", 0.5)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_rsqrt, Type::DoubleTyID, FIXED(1), "__test_altmath_rsqrt_low", 4096.0)
+
+// TEST_ALTMATH_LIB 4 x float implementations
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(4), "__test_altmath_sinf4_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(4), "__test_altmath_cosf4_high", 1.0)
+
+// TEST_ALTMATH_LIB 8 x float implementations
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::FloatTyID, FIXED(8), "__test_altmath_sinf8_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::FloatTyID, FIXED(8), "__test_altmath_cosf8_high", 1.0)
+
+// TEST_ALTMATH_LIB 2 x double implementations
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_sin, Type::DoubleTyID, FIXED(2), "__test_altmath_sin2_high", 1.0)
+TLI_DEFINE_ALTMATHFUNC(Intrinsic::fpbuiltin_cos, Type::DoubleTyID, FIXED(2), "__test_altmath_cos2_high", 1.0)
+
+
+#endif
+
+
+
+#undef TLI_DEFINE_ALTMATHFUNC
+#undef TLI_DEFINE_TEST_ALTMATHFUNCS
@@ -12,6 +12,7 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include <optional>
@@ -23,6 +24,15 @@ class Function;
 class Module;
 class Triple;
 
+/// Describes a possible implementation of a floating point builtin operation
+struct AltMathDesc {
+  Intrinsic::ID IntrinID;
+  Type::TypeID BaseFPType;
+  ElementCount VectorizationFactor;
+  StringRef FnImplName;
+  float Accuracy;
+};
+
 /// Describes a possible vectorization of a function.
 /// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized
 /// by a factor 'VectorizationFactor'.
@@ -68,6 +78,10 @@ class TargetLibraryInfoImpl {
     return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3);
   }
 
+  /// Alternate math library functions - sorted by intrinsic ID, then type,
+  ///   then vector size, then accuracy
+  std::vector<AltMathDesc> AltMathFuncDescs;
+
   /// Vectorization descriptors - sorted by ScalarFnName.
   std::vector<VecDesc> VectorDescs;
   /// Scalarization descriptors - same content as VectorDescs but sorted based
@@ -96,6 +110,19 @@ class TargetLibraryInfoImpl {
     SVML              // Intel short vector math library.
   };
 
+  /// List of known alternate math libraries.
+  ///
+  /// The alternate math library provides a set of functions that can ve used
+  /// to replace llvm.fpbuiltin intrinsic calls when one or more constraining
+  /// attributes are specified.
+  /// The library can be specified by either frontend or a commandline option,
+  /// and then used by addAltMathFunctionsFromLib for populating the tables of
+  /// math function implementations.
+  enum AltMathLibrary {
+    NoAltMathLibrary,   // Don't use any alternate math library
+    TestAltMathLibrary  // Use a fake alternate math library for testing
+  };
+
   TargetLibraryInfoImpl();
   explicit TargetLibraryInfoImpl(const Triple &T);
 
@@ -147,6 +174,19 @@ class TargetLibraryInfoImpl {
   /// This can be used for options like -fno-builtin.
   void disableAllFunctions();
 
+  /// Add a set of alternate math library function implementations with
+  /// attributes that can be used to select an implementation for an
+  /// llvm.fpbuiltin intrinsic
+  void addAltMathFunctions(ArrayRef<AltMathDesc> Fns);
+
+  /// Calls addAltMathFunctions with a known preset of functions for the
+  /// given alternate math library.
+  void addAltMathFunctionsFromLib(enum AltMathLibrary AltLib);
+
+  /// Select an alternate math library implementation that meets the criteria
+  /// described by an FPBuiltinIntrinsic call.
+  StringRef selectFPBuiltinImplementation(FPBuiltinIntrinsic *Builtin) const;
+
   /// Add a set of scalar -> vector mappings, queryable via
   /// getVectorizedFunction and getScalarizedFunction.
   void addVectorizableFunctions(ArrayRef<VecDesc> Fns);
@@ -343,6 +383,9 @@ class TargetLibraryInfo {
   bool isFunctionVectorizable(StringRef F) const {
     return Impl->isFunctionVectorizable(F);
   }
+  StringRef selectFPBuiltinImplementation(FPBuiltinIntrinsic *Builtin) const {
+    return Impl->selectFPBuiltinImplementation(Builtin);
+  }
   StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
     return Impl->getVectorizedFunction(F, VF);
   }

@@ -23,6 +23,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/FPBuiltinFnSelection.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -582,6 +583,7 @@ void CodeGenPassBuilder<Derived>::addISelPasses(AddIRPass &addPass) const {
   addPass(PreISelIntrinsicLoweringPass());
 
   derived().addIRPasses(addPass);
+  addPass(FPBuiltinFnSelectionPass());
   derived().addCodeGenPrepare(addPass);
   addPassesToHandleExceptions(addPass);
   derived().addISelPrepare(addPass);

@@ -0,0 +1,29 @@
+//===- FPBuiltinFnSelection.h - Pre-ISel intrinsic lowering pass ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements alternate math library implementation selection for
+// llvm.fpbuiltin.* intrinsics.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_FPBUILTINFNSELECTION_H
+#define LLVM_CODEGEN_FPBUILTINFNSELECTION_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+struct FPBuiltinFnSelectionPass
+    : PassInfoMixin<FPBuiltinFnSelectionPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_FPBUILTINFNSELECTION_H
@@ -39,6 +39,7 @@ FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
 FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ())
 FUNCTION_PASS("consthoist", ConstantHoistingPass, ())
+FUNCTION_PASS("fpbuiltin-fn-selection", FPBuiltinFnSelectionPass, ())
 FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
 FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
 FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))

@@ -450,6 +450,10 @@ namespace llvm {
   /// evaluation.
   ModulePass *createPreISelIntrinsicLoweringPass();
 
+  /// This pass lowers the \@llvm.fpbuiltin.{operation} intrinsics to
+  /// matching library function calls based on call site attributes.
+  FunctionPass *createFPBuiltinFnSelectionPass();
+
   /// GlobalMerge - This pass merges internal (by default) globals into structs
   /// to enable reuse of a base pointer by indexed addressing modes.
   /// It can also be configured to focus on size optimizations only.

@@ -0,0 +1,59 @@
+//===--- llvm/IR/FPBuiltinOps.def - Constrained intrinsics ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines properties of floating point builtin intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OPERATION
+#define OPERATION(N,I)
+#endif
+
+// Arguments of the entries are:
+// - operation name.
+// - name of the fpbuiltin intrinsic to represent this operation.
+
+// These are definitions for instructions, that are converted into constrained
+// intrinsics.
+//
+OPERATION(FAdd,   fpbuiltin_fadd)
+OPERATION(FSub,   fpbuiltin_fsub)
+OPERATION(FMul,   fpbuiltin_fmul)
+OPERATION(FDiv,   fpbuiltin_fdiv)
+OPERATION(FRem,   fpbuiltin_frem)
+OPERATION(Sin,    fpbuiltin_sin)
+OPERATION(Cos,    fpbuiltin_cos)
+OPERATION(Tan,    fpbuiltin_tan)
+OPERATION(Sinh,   fpbuiltin_sinh)
+OPERATION(Cosh,   fpbuiltin_cosh)
+OPERATION(Tanh,   fpbuiltin_tanh)
+OPERATION(Asin,   fpbuiltin_asin)
+OPERATION(Acos,   fpbuiltin_acos)
+OPERATION(Atan,   fpbuiltin_atan)
+OPERATION(Atan2,  fpbuiltin_atan2)
+OPERATION(Asinh,  fpbuiltin_asinh)
+OPERATION(Acosh,  fpbuiltin_acosh)
+OPERATION(Atanh,  fpbuiltin_atanh)
+OPERATION(Exp,    fpbuiltin_exp)
+OPERATION(Exp2,   fpbuiltin_exp2)
+OPERATION(Exp10,  fpbuiltin_exp10)
+OPERATION(Expm1,  fpbuiltin_expm1)
+OPERATION(Log,    fpbuiltin_log)
+OPERATION(Log2,   fpbuiltin_log2)
+OPERATION(Log10,  fpbuiltin_log10)
+OPERATION(Log1p,  fpbuiltin_log1p)
+OPERATION(Hypot,  fpbuiltin_hypot)
+OPERATION(Pow,    fpbuiltin_pow)
+OPERATION(Ldexp,  fpbuiltin_ldexp)
+OPERATION(Sqrt,   fpbuiltin_sqrt)
+OPERATION(Rsqrt,  fpbuiltin_rsqrt)
+OPERATION(Erf,    fpbuiltin_erf)
+OPERATION(Erfc,   fpbuiltin_erfc)
+OPERATION(Sincos, fpbuiltin_sincos)
+
+#undef OPERATION
@@ -23,6 +23,7 @@
 #ifndef LLVM_IR_INTRINSICINST_H
 #define LLVM_IR_INTRINSICINST_H
 
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -591,6 +592,31 @@ class VPCmpIntrinsic : public VPIntrinsic {
   /// @}
 };
 
+/// This is the common base class for floating point builtin intrinsics.
+class FPBuiltinIntrinsic : public IntrinsicInst {
+public:
+  static const std::string FPBUILTIN_PREFIX;
+  static const std::string FP_MAX_ERROR;
+
+  std::optional<float> getRequiredAccuracy() const;
+
+  Type::TypeID getBaseTypeID() const;
+  ElementCount getElementCount() const;
+
+  /// Check the callsite attributes for this FPBuiltinIntrinsic against a list
+  /// of FP attributes that the caller knows how to process to see if the
+  /// current intrinsic has unrecognized attributes
+  bool hasUnrecognizedFPAttrs(const StringSet<> HandledAttrs);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// @{
+  static bool classof(const IntrinsicInst *I);    
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+  /// @}
+};
+
 /// This is the common base class for constrained floating point intrinsics.
 class ConstrainedFPIntrinsic : public IntrinsicInst {
 public: