Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ struct DisableLIRP {

/// When true, Wcslen is disabled.
static bool Wcslen;

/// When true, HashRecognize is disabled.
static bool HashRecognize;
};

/// Performs Loop Idiom Recognize Pass.
Expand Down
176 changes: 175 additions & 1 deletion llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/HashRecognize.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
Expand Down Expand Up @@ -143,6 +144,14 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Wcslen), cl::init(false),
cl::ReallyHidden);

bool DisableLIRP::HashRecognize;
static cl::opt<bool, true>
DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize",
cl::desc("Proceed with loop idiom recognize pass, "
"but do not optimize CRC loops."),
cl::location(DisableLIRP::HashRecognize),
cl::init(false), cl::ReallyHidden);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, everything seems to be in order: will merge. Quick note for reviewers in case regressions are found: prefer flipping this cl::init to true while the regression is being investigated, over reverting the patch.


static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling "
Expand Down Expand Up @@ -242,6 +251,7 @@ class LoopIdiomRecognize {
const SCEV *BECount);
bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
bool IsLoopMemset = false);
bool optimizeCRCLoop(const PolynomialInfo &Info);

/// @}
/// \name Noncountable Loop Idiom Handling
Expand Down Expand Up @@ -287,6 +297,8 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());

std::optional<PolynomialInfo> HR;

LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
AR.MSSA, DL, ORE);
if (!LIR.runOnLoop(&L))
Expand Down Expand Up @@ -335,7 +347,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);

if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || HasMemcpy)
if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic ||
HasMemcpy || !DisableLIRP::HashRecognize)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();

Expand Down Expand Up @@ -378,6 +391,13 @@ bool LoopIdiomRecognize::runOnCountableLoop() {

MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
}

// Optimize a CRC loop if HashRecognize found one, provided we're not
// optimizing for size.
if (!DisableLIRP::HashRecognize && !ApplyCodeSizeHeuristics)
if (auto Res = HashRecognize(*CurLoop, *SE).getResult())
optimizeCRCLoop(*Res);

return MadeChange;
}

Expand Down Expand Up @@ -1514,6 +1534,160 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
return false;
}

bool LoopIdiomRecognize::optimizeCRCLoop(const PolynomialInfo &Info) {
// FIXME: Hexagon has a special HexagonLoopIdiom that optimizes CRC using
// carry-less multiplication instructions, which is more efficient than our
// Sarwate table-lookup optimization. Hence, until we're able to emit
// target-specific instructions for Hexagon, subsuming HexagonLoopIdiom,
// disable the optimization for Hexagon.
Module &M = *CurLoop->getHeader()->getModule();
Triple TT(M.getTargetTriple());
if (TT.getArch() == Triple::hexagon)
return false;

// First, create a new GlobalVariable corresponding to the
// Sarwate-lookup-table.
Type *CRCTy = Info.LHS->getType();
unsigned CRCBW = CRCTy->getIntegerBitWidth();
std::array<Constant *, 256> CRCConstants;
transform(HashRecognize::genSarwateTable(Info.RHS, Info.ByteOrderSwapped),
CRCConstants.begin(),
[CRCTy](const APInt &E) { return ConstantInt::get(CRCTy, E); });
Constant *ConstArray =
ConstantArray::get(ArrayType::get(CRCTy, 256), CRCConstants);
GlobalVariable *GV =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we created two identical LUTs in two translation units, does ICF work during linking?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect Identical COMDAT folding to work on any two identical global variables, and I'm not sure what that has to do with the patch? Does it depend on some parameter of the GlobalVariable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean by "identical" ? Constant arrays cannot be merged if the program compares array addresses. So I'd expect that's an option. Also, same-translation-unit case would be nice to test, in case someone implements CRC as a C macro.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I'm not sure how we'd write a test for this: possibly an LLD test?

new GlobalVariable(M, ConstArray->getType(), true,
GlobalValue::PrivateLinkage, ConstArray, ".crctable");

PHINode *IV = CurLoop->getCanonicalInductionVariable();
SmallVector<PHINode *, 2> Cleanup;

// Next, mark all PHIs for removal except IV.
{
for (PHINode &PN : CurLoop->getHeader()->phis()) {
if (&PN == IV)
continue;
PN.replaceAllUsesWith(PoisonValue::get(PN.getType()));
Cleanup.push_back(&PN);
}
}

// Next, fix up the trip count.
{
unsigned NewBTC = (Info.TripCount / 8) - 1;
BasicBlock *LoopBlk = CurLoop->getLoopLatch();
BranchInst *BrInst = cast<BranchInst>(LoopBlk->getTerminator());
CmpPredicate ExitPred = BrInst->getSuccessor(0) == LoopBlk
? ICmpInst::Predicate::ICMP_NE
: ICmpInst::Predicate::ICMP_EQ;
Instruction *ExitCond = CurLoop->getLatchCmpInst();
Value *ExitLimit = ConstantInt::get(IV->getType(), NewBTC);
IRBuilder<> Builder(ExitCond);
Value *NewExitCond =
Builder.CreateICmp(ExitPred, IV, ExitLimit, "exit.cond");
ExitCond->replaceAllUsesWith(NewExitCond);
deleteDeadInstruction(ExitCond);
}

// Finally, fill the loop with the Sarwate-table-lookup logic, and replace all
// uses of ComputedValue.
//
// Little-endian:
// crc = (crc >> 8) ^ tbl[(iv'th byte of data) ^ (bottom byte of crc)]
// Big-Endian:
// crc = (crc << 8) ^ tbl[(iv'th byte of data) ^ (top byte of crc)]
{
auto LoByte = [](IRBuilderBase &Builder, Value *Op, const Twine &Name) {
Type *OpTy = Op->getType();
unsigned OpBW = OpTy->getIntegerBitWidth();
return OpBW > 8
? Builder.CreateAnd(Op, ConstantInt::get(OpTy, 0XFF), Name)
: Op;
};
auto HiIdx = [LoByte, CRCBW](IRBuilderBase &Builder, Value *Op,
const Twine &Name) {
Type *OpTy = Op->getType();

// When the bitwidth of the CRC mismatches the Op's bitwidth, we need to
// use the CRC's bitwidth as the reference for shifting right.
return LoByte(Builder,
CRCBW > 8 ? Builder.CreateLShr(
Op, ConstantInt::get(OpTy, CRCBW - 8), Name)
: Op,
Name + ".lo.byte");
};

IRBuilder<> Builder(CurLoop->getHeader(),
CurLoop->getHeader()->getFirstNonPHIIt());

// Create the CRC PHI, and initialize its incoming value to the initial
// value of CRC.
PHINode *CRCPhi = Builder.CreatePHI(CRCTy, 2, "crc");
CRCPhi->addIncoming(Info.LHS, CurLoop->getLoopPreheader());

// CRC is now an evolving variable, initialized to the PHI.
Value *CRC = CRCPhi;

// TableIndexer = ((top|bottom) byte of CRC). It is XOR'ed with (iv'th byte
// of LHSAux), if LHSAux is non-nullptr.
Value *Indexer = CRC;
if (Value *Data = Info.LHSAux) {
Type *DataTy = Data->getType();

// To index into the (iv'th byte of LHSAux), we multiply iv by 8, and we
// shift right by that amount, and take the lo-byte (in the little-endian
// case), or shift left by that amount, and take the hi-idx (in the
// big-endian case).
Value *IVBits = Builder.CreateZExtOrTrunc(
Builder.CreateShl(IV, 3, "iv.bits"), DataTy, "iv.indexer");
Value *DataIndexer =
Info.ByteOrderSwapped
? Builder.CreateShl(Data, IVBits, "data.indexer")
: Builder.CreateLShr(Data, IVBits, "data.indexer");
Indexer = Builder.CreateXor(
DataIndexer,
Builder.CreateZExtOrTrunc(Indexer, DataTy, "crc.indexer.cast"),
"crc.data.indexer");
}

Indexer = Info.ByteOrderSwapped ? HiIdx(Builder, Indexer, "indexer.hi")
: LoByte(Builder, Indexer, "indexer.lo");

// Always index into a GEP using the index type.
Indexer = Builder.CreateZExt(
Indexer, SE->getDataLayout().getIndexType(GV->getType()),
"indexer.ext");

// CRCTableLd = CRCTable[(iv'th byte of data) ^ (top|bottom) byte of CRC].
Value *CRCTableGEP =
Builder.CreateInBoundsGEP(CRCTy, GV, Indexer, "tbl.ptradd");
Value *CRCTableLd = Builder.CreateLoad(CRCTy, CRCTableGEP, "tbl.ld");

// CRCNext = (CRC (<<|>>) 8) ^ CRCTableLd, or simply CRCTableLd in case of
// CRC-8.
Value *CRCNext = CRCTableLd;
if (CRCBW > 8) {
Value *CRCShift = Info.ByteOrderSwapped
? Builder.CreateShl(CRC, 8, "crc.be.shift")
: Builder.CreateLShr(CRC, 8, "crc.le.shift");
CRCNext = Builder.CreateXor(CRCShift, CRCTableLd, "crc.next");
}

// Connect the back-edge for the loop, and RAUW the ComputedValue.
CRCPhi->addIncoming(CRCNext, CurLoop->getLoopLatch());
Info.ComputedValue->replaceUsesOutsideBlock(CRCNext,
CurLoop->getLoopLatch());
}

// Cleanup.
{
for (PHINode *PN : Cleanup)
RecursivelyDeleteDeadPHINode(PN);
SE->forgetLoop(CurLoop);
}
return true;
}

bool LoopIdiomRecognize::runOnNoncountableLoop() {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()
Expand Down
Loading
Loading