llvm
diff --git a/‎llvm/include/llvm/Analysis/TargetTransformInfo.h‎
Lines changed: 7 additions & 0 deletions b/‎llvm/include/llvm/Analysis/TargetTransformInfo.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎llvm/include/llvm/CodeGen/ISDOpcodes.h‎
Lines changed: 3 additions & 0 deletions b/‎llvm/include/llvm/CodeGen/ISDOpcodes.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llvm/include/llvm/CodeGen/SelectionDAG.h‎
Lines changed: 4 additions & 0 deletions b/‎llvm/include/llvm/CodeGen/SelectionDAG.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/include/llvm/CodeGen/SelectionDAGNodes.h‎
Lines changed: 68 additions & 0 deletions b/‎llvm/include/llvm/CodeGen/SelectionDAGNodes.h‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/IRBuilder.h‎
Lines changed: 11 additions & 0 deletions b/‎llvm/include/llvm/IR/IRBuilder.h‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/Intrinsics.h‎
Lines changed: 7 additions & 4 deletions b/‎llvm/include/llvm/IR/Intrinsics.h‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎llvm/include/llvm/IR/Intrinsics.td‎
Lines changed: 15 additions & 0 deletions b/‎llvm/include/llvm/IR/Intrinsics.td‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Target/TargetSelectionDAG.td‎
Lines changed: 13 additions & 0 deletions b/‎llvm/include/llvm/Target/TargetSelectionDAG.td‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎llvm/lib/Analysis/TargetTransformInfo.cpp‎
Lines changed: 11 additions & 0 deletions b/‎llvm/lib/Analysis/TargetTransformInfo.cpp‎
Lines changed: 11 additions & 0 deletions
@@ -270,6 +270,13 @@ class TargetTransformInfo {
                                      int64_t BaseOffset, bool HasBaseReg,
                                      int64_t Scale) const;
 
+  /// \brief Return true if the target works with masked instruction
+  /// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
+  /// AVX-512 architecture will also allow masks for non-consecutive memory
+  /// accesses.
+  virtual bool isLegalPredicatedStore(Type *DataType, int Consecutive) const;
+  virtual bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const;
+
   /// \brief Return the cost of the scaling factor used in the addressing
   /// mode represented by AM for this target, for a load/store
   /// of the specified type.
 
@@ -675,6 +675,9 @@ namespace ISD {
     ATOMIC_LOAD_UMIN,
     ATOMIC_LOAD_UMAX,
 
+    // Masked load and store
+    MLOAD, MSTORE,
+
     /// This corresponds to the llvm.lifetime.* intrinsics. The first operand
     /// is the chain and the second operand is the alloca pointer.
     LIFETIME_START, LIFETIME_END,
 
@@ -866,6 +866,10 @@ class SelectionDAG {
   SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base,
                            SDValue Offset, ISD::MemIndexedMode AM);
 
+  SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
+                        SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
+  SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+                         SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
   /// getSrcValue - Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
 
 
@@ -1177,6 +1177,8 @@ class MemSDNode : public SDNode {
            N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
            N->getOpcode() == ISD::ATOMIC_LOAD         ||
            N->getOpcode() == ISD::ATOMIC_STORE        ||
+           N->getOpcode() == ISD::MLOAD               ||
+           N->getOpcode() == ISD::MSTORE              ||
            N->isMemIntrinsic()                        ||
            N->isTargetMemoryOpcode();
   }
@@ -1926,6 +1928,72 @@ class StoreSDNode : public LSBaseSDNode {
   }
 };
 
+/// MaskedLoadStoreSDNode - This is a base class is used to represent MLOAD and
+/// MSTORE nodes
+///
+class MaskedLoadStoreSDNode : public MemSDNode {
+  // Operands
+  SDUse Ops[4];
+public:
+  friend class SelectionDAG;
+  MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
+                   SDValue *Operands, unsigned numOperands, 
+                   SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+    : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+    InitOperands(Ops, Operands, numOperands);
+  }
+
+  // In the both nodes address is Op1, mask is Op2:
+  // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
+  // MaskedStoreSDNode (Chain, ptr, mask, data)
+  // Mask is a vector of i1 elements
+  const SDValue &getBasePtr() const { return getOperand(1); }
+  const SDValue &getMask() const    { return getOperand(2); }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MLOAD ||
+           N->getOpcode() == ISD::MSTORE;
+  }
+};
+
+/// MaskedLoadSDNode - This class is used to represent an MLOAD node
+///
+class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
+public:
+  friend class SelectionDAG;
+  MaskedLoadSDNode(unsigned Order, DebugLoc dl,
+                   SDValue *Operands, unsigned numOperands, 
+                   SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+    : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
+                            VTs, MemVT, MMO) 
+  {}
+
+  const SDValue &getSrc0() const { return getOperand(3); }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MLOAD;
+  }
+};
+
+/// MaskedStoreSDNode - This class is used to represent an MSTORE node
+///
+class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
+
+public:
+  friend class SelectionDAG;
+  MaskedStoreSDNode(unsigned Order, DebugLoc dl,
+                   SDValue *Operands, unsigned numOperands, 
+                   SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+    : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
+                            VTs, MemVT, MMO) 
+  {}
+
+  const SDValue &getData() const { return getOperand(3); }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MSTORE;
+  }
+};
+
 /// MachineSDNode - An SDNode that represents everything that will be needed
 /// to construct a MachineInstr. These nodes are created during the
 /// instruction selection proper phase.
 
@@ -429,11 +429,22 @@ class IRBuilderBase {
   /// If the pointer isn't i8* it will be converted.
   CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = nullptr);
 
+  /// \brief Create a call to Masked Load intrinsic
+  CallInst *CreateMaskedLoad(ArrayRef<Value *> Ops);
+
+  /// \brief Create a call to Masked Store intrinsic
+  CallInst *CreateMaskedStore(ArrayRef<Value *> Ops);
+
   /// \brief Create an assume intrinsic call that allows the optimizer to
   /// assume that the provided condition will be true.
   CallInst *CreateAssumption(Value *Cond);
 
 private:
+  /// \brief Create a call to a masked intrinsic with given Id.
+  /// Masked intrinsic has only one overloaded type - data type.
+  CallInst *CreateMaskedIntrinsic(unsigned Id, ArrayRef<Value *> Ops,
+                                  Type *DataTy);
+
   Value *getCastedInt8PtrValue(Value *Ptr);
 };
 
 
@@ -76,7 +76,8 @@ namespace Intrinsic {
     enum IITDescriptorKind {
       Void, VarArg, MMX, Metadata, Half, Float, Double,
       Integer, Vector, Pointer, Struct,
-      Argument, ExtendArgument, TruncArgument, HalfVecArgument
+      Argument, ExtendArgument, TruncArgument, HalfVecArgument,
+      SameVecWidthArgument
     } Kind;
 
     union {
@@ -96,13 +97,15 @@ namespace Intrinsic {
     };
     unsigned getArgumentNumber() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
-             Kind == TruncArgument || Kind == HalfVecArgument);
+             Kind == TruncArgument || Kind == HalfVecArgument ||
+             Kind == SameVecWidthArgument);
       return Argument_Info >> 2;
     }
     ArgKind getArgumentKind() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
-             Kind == TruncArgument || Kind == HalfVecArgument);
-      return (ArgKind)(Argument_Info&3);
+             Kind == TruncArgument || Kind == HalfVecArgument ||
+             Kind == SameVecWidthArgument);
+      return (ArgKind)(Argument_Info & 3);
     }
 
     static IITDescriptor get(IITDescriptorKind K, unsigned Field) {
 
@@ -112,6 +112,10 @@ class LLVMMatchType<int num>
 // the intrinsic is overloaded, so the matched type should be declared as iAny.
 class LLVMExtendedType<int num> : LLVMMatchType<num>;
 class LLVMTruncatedType<int num> : LLVMMatchType<num>;
+class LLVMVectorSameWidth<int num, LLVMType elty>
+  : LLVMMatchType<num> {
+  ValueType ElTy = elty.VT;
+}
 
 // Match the type of another intrinsic parameter that is expected to be a
 // vector type, but change the element count to be half as many
@@ -555,6 +559,17 @@ def int_convertuu  : Intrinsic<[llvm_anyint_ty],
 def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
                                 [], "llvm.clear_cache">;
 
+//===-------------------------- Masked Intrinsics -------------------------===//
+//
+def int_masked_store : Intrinsic<[], [llvm_ptr_ty, llvm_anyvector_ty,
+                                      llvm_i32_ty,
+                                      LLVMVectorSameWidth<0, llvm_i1_ty>],
+                                 [IntrReadWriteArgMem]>;
+
+def int_masked_load  : Intrinsic<[llvm_anyvector_ty],
+                                 [llvm_ptr_ty, LLVMMatchType<0>, llvm_i32_ty,
+                                  LLVMVectorSameWidth<0, llvm_i1_ty>],
+                                 [IntrReadArgMem]>;
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
 
@@ -188,6 +188,14 @@ def SDTIStore : SDTypeProfile<1, 3, [       // indexed store
   SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
 ]>;
 
+def SDTMaskedStore: SDTypeProfile<0, 3, [       // masked store
+  SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>
+]>;
+
+def SDTMaskedLoad: SDTypeProfile<1, 3, [       // masked load
+  SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>
+]>;
+
 def SDTVecShuffle : SDTypeProfile<1, 2, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
 ]>;
@@ -454,6 +462,11 @@ def atomic_load      : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
 def atomic_store     : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
                     [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
+def masked_store : SDNode<"ISD::MSTORE",  SDTMaskedStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def masked_load  : SDNode<"ISD::MLOAD",  SDTMaskedLoad,
+                       [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
 // Do not use ld, st directly. Use load, extload, sextload, zextload, store,
 // and truncst (see below).
 def ld         : SDNode<"ISD::LOAD"       , SDTLoad,
 
@@ -101,6 +101,17 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
   return PrevTTI->isLegalICmpImmediate(Imm);
 }
 
+bool TargetTransformInfo::isLegalPredicatedLoad(Type *DataType,
+                                                int Consecutive) const {
+  return false;
+}
+
+bool TargetTransformInfo::isLegalPredicatedStore(Type *DataType,
+                                                 int Consecutive) const {
+  return false;
+}
+
+
 bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                                 int64_t BaseOffset,
                                                 bool HasBaseReg,