Skip to content

Commit 85467c5

Browse files
committed
[LoopIdiom] Use HashRecognize to optimize CRC
1 parent 03b41f1 commit 85467c5

File tree

3 files changed

+618
-1
lines changed

3 files changed

+618
-1
lines changed

llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ struct DisableLIRP {
4040

4141
/// When true, Wcslen is disabled.
4242
static bool Wcslen;
43+
44+
/// When true, HashRecognize is disabled.
45+
static bool HashRecognize;
4346
};
4447

4548
/// Performs Loop Idiom Recognize Pass.

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "llvm/ADT/StringRef.h"
4040
#include "llvm/Analysis/AliasAnalysis.h"
4141
#include "llvm/Analysis/CmpInstAnalysis.h"
42+
#include "llvm/Analysis/HashRecognize.h"
4243
#include "llvm/Analysis/LoopInfo.h"
4344
#include "llvm/Analysis/LoopPass.h"
4445
#include "llvm/Analysis/MemoryLocation.h"
@@ -144,6 +145,14 @@ static cl::opt<bool, true>
144145
cl::location(DisableLIRP::Wcslen), cl::init(false),
145146
cl::ReallyHidden);
146147

148+
bool DisableLIRP::HashRecognize;
149+
static cl::opt<bool, true>
150+
DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize",
151+
cl::desc("Proceed with loop idiom recognize pass, "
152+
"but do not optimize CRC loops."),
153+
cl::location(DisableLIRP::HashRecognize),
154+
cl::init(false), cl::ReallyHidden);
155+
147156
static cl::opt<bool> UseLIRCodeSizeHeurs(
148157
"use-lir-code-size-heurs",
149158
cl::desc("Use loop idiom recognition code size heuristics when compiling "
@@ -238,6 +247,7 @@ class LoopIdiomRecognize {
238247
const SCEV *BECount);
239248
bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
240249
bool IsLoopMemset = false);
250+
bool optimizeCRCLoop(const PolynomialInfo &Info);
241251

242252
/// @}
243253
/// \name Noncountable Loop Idiom Handling
@@ -283,6 +293,8 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
283293
// but ORE cannot be preserved (see comment before the pass definition).
284294
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
285295

296+
std::optional<PolynomialInfo> HR;
297+
286298
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
287299
AR.MSSA, DL, ORE);
288300
if (!LIR.runOnLoop(&L))
@@ -326,7 +338,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
326338
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
327339
HasMemcpy = TLI->has(LibFunc_memcpy);
328340

329-
if (HasMemset || HasMemsetPattern || HasMemcpy)
341+
if (HasMemset || HasMemsetPattern || HasMemcpy || !DisableLIRP::HashRecognize)
330342
if (SE->hasLoopInvariantBackedgeTakenCount(L))
331343
return runOnCountableLoop();
332344

@@ -369,6 +381,12 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
369381

370382
MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
371383
}
384+
385+
// Optimize a CRC loop if HashRecognize found one.
386+
if (!DisableLIRP::HashRecognize)
387+
if (auto Res = HashRecognize(*CurLoop, *SE).getResult())
388+
optimizeCRCLoop(*Res);
389+
372390
return MadeChange;
373391
}
374392

@@ -1473,6 +1491,160 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
14731491
return false;
14741492
}
14751493

1494+
bool LoopIdiomRecognize::optimizeCRCLoop(const PolynomialInfo &Info) {
1495+
// FIXME: Hexagon has a special HexagonLoopIdiom that optimizes CRC using
1496+
// carry-less multiplication instructions, which is more efficient than our
1497+
// Sarwate table-lookup optimization. Hence, until we're able to emit
1498+
// target-specific instructions for Hexagon, subsuming HexagonLoopIdiom,
1499+
// disable the optimization for Hexagon.
1500+
Module &M = *CurLoop->getHeader()->getModule();
1501+
Triple TT(M.getTargetTriple());
1502+
if (TT.getArch() == Triple::hexagon)
1503+
return false;
1504+
1505+
// First, create a new GlobalVariable corresponding to the
1506+
// Sarwate-lookup-table.
1507+
Type *CRCTy = Info.LHS->getType();
1508+
unsigned CRCBW = CRCTy->getIntegerBitWidth();
1509+
std::array<Constant *, 256> CRCConstants;
1510+
transform(HashRecognize::genSarwateTable(Info.RHS, Info.ByteOrderSwapped),
1511+
CRCConstants.begin(),
1512+
[CRCTy](const APInt &E) { return ConstantInt::get(CRCTy, E); });
1513+
Constant *ConstArray =
1514+
ConstantArray::get(ArrayType::get(CRCTy, 256), CRCConstants);
1515+
GlobalVariable *GV =
1516+
new GlobalVariable(M, ConstArray->getType(), true,
1517+
GlobalValue::PrivateLinkage, ConstArray, ".crctable");
1518+
1519+
PHINode *IV = CurLoop->getCanonicalInductionVariable();
1520+
SmallVector<PHINode *, 2> Cleanup;
1521+
1522+
// Next, mark all PHIs for removal except IV.
1523+
{
1524+
for (PHINode &PN : CurLoop->getHeader()->phis()) {
1525+
if (&PN == IV)
1526+
continue;
1527+
PN.replaceAllUsesWith(PoisonValue::get(PN.getType()));
1528+
Cleanup.push_back(&PN);
1529+
}
1530+
}
1531+
1532+
// Next, fix up the trip count.
1533+
{
1534+
unsigned NewBTC = (Info.TripCount / 8) - 1;
1535+
BasicBlock *LoopBlk = CurLoop->getLoopLatch();
1536+
BranchInst *BrInst = cast<BranchInst>(LoopBlk->getTerminator());
1537+
CmpPredicate ExitPred = BrInst->getSuccessor(0) == LoopBlk
1538+
? ICmpInst::Predicate::ICMP_NE
1539+
: ICmpInst::Predicate::ICMP_EQ;
1540+
Instruction *ExitCond = CurLoop->getLatchCmpInst();
1541+
Value *ExitLimit = ConstantInt::get(IV->getType(), NewBTC);
1542+
IRBuilder<> Builder(ExitCond);
1543+
Value *NewExitCond =
1544+
Builder.CreateICmp(ExitPred, IV, ExitLimit, "exit.cond");
1545+
ExitCond->replaceAllUsesWith(NewExitCond);
1546+
deleteDeadInstruction(ExitCond);
1547+
}
1548+
1549+
// Finally, fill the loop with the Sarwate-table-lookup logic, and replace all
1550+
// uses of ComputedValue.
1551+
//
1552+
// Little-endian:
1553+
// crc = (crc >> 8) ^ tbl[(iv'th byte of data) ^ (bottom byte of crc)]
1554+
// Big-Endian:
1555+
// crc = (crc << 8) ^ tbl[(iv'th byte of data) ^ (top byte of crc)]
1556+
{
1557+
auto LoByte = [](IRBuilderBase &Builder, Value *Op, const Twine &Name) {
1558+
Type *OpTy = Op->getType();
1559+
unsigned OpBW = OpTy->getIntegerBitWidth();
1560+
return OpBW > 8
1561+
? Builder.CreateAnd(Op, ConstantInt::get(OpTy, 0XFF), Name)
1562+
: Op;
1563+
};
1564+
auto HiIdx = [LoByte, CRCBW](IRBuilderBase &Builder, Value *Op,
1565+
const Twine &Name) {
1566+
Type *OpTy = Op->getType();
1567+
1568+
// When the bitwidth of the CRC mismatches the Op's bitwidth, we need to
1569+
// use the CRC's bitwidth as the reference for shifting right.
1570+
return LoByte(Builder,
1571+
CRCBW > 8 ? Builder.CreateLShr(
1572+
Op, ConstantInt::get(OpTy, CRCBW - 8), Name)
1573+
: Op,
1574+
Name + ".lo.byte");
1575+
};
1576+
1577+
IRBuilder<> Builder(CurLoop->getHeader(),
1578+
CurLoop->getHeader()->getFirstNonPHIIt());
1579+
1580+
// Create the CRC PHI, and initialize its incoming value to the initial
1581+
// value of CRC.
1582+
PHINode *CRCPhi = Builder.CreatePHI(CRCTy, 2, "crc");
1583+
CRCPhi->addIncoming(Info.LHS, CurLoop->getLoopPreheader());
1584+
1585+
// CRC is now an evolving variable, initialized to the PHI.
1586+
Value *CRC = CRCPhi;
1587+
1588+
// TableIndexer = ((top|bottom) byte of CRC). It is XOR'ed with (iv'th byte
1589+
// of LHSAux), if LHSAux is non-nullptr.
1590+
Value *Indexer = CRC;
1591+
if (Value *Data = Info.LHSAux) {
1592+
Type *DataTy = Data->getType();
1593+
1594+
// To index into the (iv'th byte of LHSAux), we multiply iv by 8, and we
1595+
// shift right by that amount, and take the lo-byte (in the little-endian
1596+
// case), or shift left by that amount, and take the hi-idx (in the
1597+
// big-endian case).
1598+
Value *IVBits = Builder.CreateZExtOrTrunc(
1599+
Builder.CreateShl(IV, 3, "iv.bits"), DataTy, "iv.indexer");
1600+
Value *DataIndexer =
1601+
Info.ByteOrderSwapped
1602+
? Builder.CreateShl(Data, IVBits, "data.indexer")
1603+
: Builder.CreateLShr(Data, IVBits, "data.indexer");
1604+
Indexer = Builder.CreateXor(
1605+
DataIndexer,
1606+
Builder.CreateZExtOrTrunc(Indexer, DataTy, "crc.indexer.cast"),
1607+
"crc.data.indexer");
1608+
}
1609+
1610+
Indexer = Info.ByteOrderSwapped ? HiIdx(Builder, Indexer, "indexer.hi")
1611+
: LoByte(Builder, Indexer, "indexer.lo");
1612+
1613+
// Always index into a GEP using the index type.
1614+
Indexer = Builder.CreateZExt(
1615+
Indexer, SE->getDataLayout().getIndexType(GV->getType()),
1616+
"indexer.ext");
1617+
1618+
// CRCTableLd = CRCTable[(iv'th byte of data) ^ (top|bottom) byte of CRC].
1619+
Value *CRCTableGEP =
1620+
Builder.CreateInBoundsGEP(CRCTy, GV, Indexer, "tbl.ptradd");
1621+
Value *CRCTableLd = Builder.CreateLoad(CRCTy, CRCTableGEP, "tbl.ld");
1622+
1623+
// CRCNext = (CRC (<<|>>) 8) ^ CRCTableLd, or simply CRCTableLd in case of
1624+
// CRC-8.
1625+
Value *CRCNext = CRCTableLd;
1626+
if (CRCBW > 8) {
1627+
Value *CRCShift = Info.ByteOrderSwapped
1628+
? Builder.CreateShl(CRC, 8, "crc.be.shift")
1629+
: Builder.CreateLShr(CRC, 8, "crc.le.shift");
1630+
CRCNext = Builder.CreateXor(CRCShift, CRCTableLd, "crc.next");
1631+
}
1632+
1633+
// Connect the back-edge for the loop, and RAUW the ComputedValue.
1634+
CRCPhi->addIncoming(CRCNext, CurLoop->getLoopLatch());
1635+
Info.ComputedValue->replaceUsesOutsideBlock(CRCNext,
1636+
CurLoop->getLoopLatch());
1637+
}
1638+
1639+
// Cleanup.
1640+
{
1641+
for (PHINode *PN : Cleanup)
1642+
RecursivelyDeleteDeadPHINode(PN);
1643+
SE->forgetLoop(CurLoop);
1644+
}
1645+
return true;
1646+
}
1647+
14761648
bool LoopIdiomRecognize::runOnNoncountableLoop() {
14771649
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
14781650
<< CurLoop->getHeader()->getParent()->getName()

0 commit comments

Comments
 (0)