Skip to content

Commit f1de34b

Browse files
author
Elena Demikhovsky
committed
Masked Load / Store Intrinsics - the CodeGen part.
I'm recommiting the codegen part of the patch. The vectorizer part will be send to review again. Masked Vector Load and Store Intrinsics. Introduced new target-independent intrinsics in order to support masked vector loads and stores. The loop vectorizer optimizes loops containing conditional memory accesses by generating these intrinsics for existing targets AVX2 and AVX-512. The vectorizer asks the target about availability of masked vector loads and stores. Added SDNodes for masked operations and lowering patterns for X86 code generator. Examples: <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32> %passthru, i32 4 /* align */, <16 x i1> %mask) declare void @llvm.masked.store.v8f64(i8* %addr, <8 x double> %value, i32 4, <8 x i1> %mask) Scalarizer for other targets (not AVX2/AVX-512) will be done in a separate patch. http://reviews.llvm.org/D6191 llvm-svn: 223348
1 parent 8b24b32 commit f1de34b

27 files changed

+873
-12
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,13 @@ class TargetTransformInfo {
270270
int64_t BaseOffset, bool HasBaseReg,
271271
int64_t Scale) const;
272272

273+
/// \brief Return true if the target works with masked instruction
274+
/// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
275+
/// AVX-512 architecture will also allow masks for non-consecutive memory
276+
/// accesses.
277+
virtual bool isLegalPredicatedStore(Type *DataType, int Consecutive) const;
278+
virtual bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const;
279+
273280
/// \brief Return the cost of the scaling factor used in the addressing
274281
/// mode represented by AM for this target, for a load/store
275282
/// of the specified type.

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,9 @@ namespace ISD {
675675
ATOMIC_LOAD_UMIN,
676676
ATOMIC_LOAD_UMAX,
677677

678+
// Masked load and store
679+
MLOAD, MSTORE,
680+
678681
/// This corresponds to the llvm.lifetime.* intrinsics. The first operand
679682
/// is the chain and the second operand is the alloca pointer.
680683
LIFETIME_START, LIFETIME_END,

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,10 @@ class SelectionDAG {
866866
SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base,
867867
SDValue Offset, ISD::MemIndexedMode AM);
868868

869+
SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
870+
SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
871+
SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
872+
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
869873
/// getSrcValue - Construct a node to track a Value* through the backend.
870874
SDValue getSrcValue(const Value *v);
871875

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,6 +1177,8 @@ class MemSDNode : public SDNode {
11771177
N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
11781178
N->getOpcode() == ISD::ATOMIC_LOAD ||
11791179
N->getOpcode() == ISD::ATOMIC_STORE ||
1180+
N->getOpcode() == ISD::MLOAD ||
1181+
N->getOpcode() == ISD::MSTORE ||
11801182
N->isMemIntrinsic() ||
11811183
N->isTargetMemoryOpcode();
11821184
}
@@ -1926,6 +1928,72 @@ class StoreSDNode : public LSBaseSDNode {
19261928
}
19271929
};
19281930

1931+
/// MaskedLoadStoreSDNode - This is a base class is used to represent MLOAD and
1932+
/// MSTORE nodes
1933+
///
1934+
class MaskedLoadStoreSDNode : public MemSDNode {
1935+
// Operands
1936+
SDUse Ops[4];
1937+
public:
1938+
friend class SelectionDAG;
1939+
MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
1940+
SDValue *Operands, unsigned numOperands,
1941+
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1942+
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
1943+
InitOperands(Ops, Operands, numOperands);
1944+
}
1945+
1946+
// In the both nodes address is Op1, mask is Op2:
1947+
// MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
1948+
// MaskedStoreSDNode (Chain, ptr, mask, data)
1949+
// Mask is a vector of i1 elements
1950+
const SDValue &getBasePtr() const { return getOperand(1); }
1951+
const SDValue &getMask() const { return getOperand(2); }
1952+
1953+
static bool classof(const SDNode *N) {
1954+
return N->getOpcode() == ISD::MLOAD ||
1955+
N->getOpcode() == ISD::MSTORE;
1956+
}
1957+
};
1958+
1959+
/// MaskedLoadSDNode - This class is used to represent an MLOAD node
1960+
///
1961+
class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
1962+
public:
1963+
friend class SelectionDAG;
1964+
MaskedLoadSDNode(unsigned Order, DebugLoc dl,
1965+
SDValue *Operands, unsigned numOperands,
1966+
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1967+
: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
1968+
VTs, MemVT, MMO)
1969+
{}
1970+
1971+
const SDValue &getSrc0() const { return getOperand(3); }
1972+
static bool classof(const SDNode *N) {
1973+
return N->getOpcode() == ISD::MLOAD;
1974+
}
1975+
};
1976+
1977+
/// MaskedStoreSDNode - This class is used to represent an MSTORE node
1978+
///
1979+
class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
1980+
1981+
public:
1982+
friend class SelectionDAG;
1983+
MaskedStoreSDNode(unsigned Order, DebugLoc dl,
1984+
SDValue *Operands, unsigned numOperands,
1985+
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1986+
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
1987+
VTs, MemVT, MMO)
1988+
{}
1989+
1990+
const SDValue &getData() const { return getOperand(3); }
1991+
1992+
static bool classof(const SDNode *N) {
1993+
return N->getOpcode() == ISD::MSTORE;
1994+
}
1995+
};
1996+
19291997
/// MachineSDNode - An SDNode that represents everything that will be needed
19301998
/// to construct a MachineInstr. These nodes are created during the
19311999
/// instruction selection proper phase.

llvm/include/llvm/IR/IRBuilder.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,11 +429,22 @@ class IRBuilderBase {
429429
/// If the pointer isn't i8* it will be converted.
430430
CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = nullptr);
431431

432+
/// \brief Create a call to Masked Load intrinsic
433+
CallInst *CreateMaskedLoad(ArrayRef<Value *> Ops);
434+
435+
/// \brief Create a call to Masked Store intrinsic
436+
CallInst *CreateMaskedStore(ArrayRef<Value *> Ops);
437+
432438
/// \brief Create an assume intrinsic call that allows the optimizer to
433439
/// assume that the provided condition will be true.
434440
CallInst *CreateAssumption(Value *Cond);
435441

436442
private:
443+
/// \brief Create a call to a masked intrinsic with given Id.
444+
/// Masked intrinsic has only one overloaded type - data type.
445+
CallInst *CreateMaskedIntrinsic(unsigned Id, ArrayRef<Value *> Ops,
446+
Type *DataTy);
447+
437448
Value *getCastedInt8PtrValue(Value *Ptr);
438449
};
439450

llvm/include/llvm/IR/Intrinsics.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ namespace Intrinsic {
7676
enum IITDescriptorKind {
7777
Void, VarArg, MMX, Metadata, Half, Float, Double,
7878
Integer, Vector, Pointer, Struct,
79-
Argument, ExtendArgument, TruncArgument, HalfVecArgument
79+
Argument, ExtendArgument, TruncArgument, HalfVecArgument,
80+
SameVecWidthArgument
8081
} Kind;
8182

8283
union {
@@ -96,13 +97,15 @@ namespace Intrinsic {
9697
};
9798
unsigned getArgumentNumber() const {
9899
assert(Kind == Argument || Kind == ExtendArgument ||
99-
Kind == TruncArgument || Kind == HalfVecArgument);
100+
Kind == TruncArgument || Kind == HalfVecArgument ||
101+
Kind == SameVecWidthArgument);
100102
return Argument_Info >> 2;
101103
}
102104
ArgKind getArgumentKind() const {
103105
assert(Kind == Argument || Kind == ExtendArgument ||
104-
Kind == TruncArgument || Kind == HalfVecArgument);
105-
return (ArgKind)(Argument_Info&3);
106+
Kind == TruncArgument || Kind == HalfVecArgument ||
107+
Kind == SameVecWidthArgument);
108+
return (ArgKind)(Argument_Info & 3);
106109
}
107110

108111
static IITDescriptor get(IITDescriptorKind K, unsigned Field) {

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ class LLVMMatchType<int num>
112112
// the intrinsic is overloaded, so the matched type should be declared as iAny.
113113
class LLVMExtendedType<int num> : LLVMMatchType<num>;
114114
class LLVMTruncatedType<int num> : LLVMMatchType<num>;
115+
class LLVMVectorSameWidth<int num, LLVMType elty>
116+
: LLVMMatchType<num> {
117+
ValueType ElTy = elty.VT;
118+
}
115119

116120
// Match the type of another intrinsic parameter that is expected to be a
117121
// vector type, but change the element count to be half as many
@@ -555,6 +559,17 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
555559
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
556560
[], "llvm.clear_cache">;
557561

562+
//===-------------------------- Masked Intrinsics -------------------------===//
563+
//
564+
def int_masked_store : Intrinsic<[], [llvm_ptr_ty, llvm_anyvector_ty,
565+
llvm_i32_ty,
566+
LLVMVectorSameWidth<0, llvm_i1_ty>],
567+
[IntrReadWriteArgMem]>;
568+
569+
def int_masked_load : Intrinsic<[llvm_anyvector_ty],
570+
[llvm_ptr_ty, LLVMMatchType<0>, llvm_i32_ty,
571+
LLVMVectorSameWidth<0, llvm_i1_ty>],
572+
[IntrReadArgMem]>;
558573
//===----------------------------------------------------------------------===//
559574
// Target-specific intrinsics
560575
//===----------------------------------------------------------------------===//

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,14 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
188188
SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
189189
]>;
190190

191+
def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store
192+
SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>
193+
]>;
194+
195+
def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load
196+
SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>
197+
]>;
198+
191199
def SDTVecShuffle : SDTypeProfile<1, 2, [
192200
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
193201
]>;
@@ -454,6 +462,11 @@ def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
454462
def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
455463
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
456464

465+
def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore,
466+
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
467+
def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad,
468+
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
469+
457470
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
458471
// and truncst (see below).
459472
def ld : SDNode<"ISD::LOAD" , SDTLoad,

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
101101
return PrevTTI->isLegalICmpImmediate(Imm);
102102
}
103103

104+
bool TargetTransformInfo::isLegalPredicatedLoad(Type *DataType,
105+
int Consecutive) const {
106+
return false;
107+
}
108+
109+
bool TargetTransformInfo::isLegalPredicatedStore(Type *DataType,
110+
int Consecutive) const {
111+
return false;
112+
}
113+
114+
104115
bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
105116
int64_t BaseOffset,
106117
bool HasBaseReg,

0 commit comments

Comments
 (0)