diff --git a/llvm/include/llvm/Analysis/HashRecognize.h b/llvm/include/llvm/Analysis/HashRecognize.h new file mode 100644 index 0000000000000..872404de5bf41 --- /dev/null +++ b/llvm/include/llvm/Analysis/HashRecognize.h @@ -0,0 +1,110 @@ +//===- HashRecognize.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface for the HashRecognize analysis, which identifies hash functions +// that can be optimized using a lookup-table or with target-specific +// instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_HASHRECOGNIZE_H +#define LLVM_ANALYSIS_HASHRECOGNIZE_H + +#include "llvm/ADT/APInt.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { +/// A tuple of bits that are expected to be zero, number N of them expected to +/// be zero, with a boolean indicating whether it's the top or bottom N bits +/// expected to be zero. +using ErrBits = std::tuple; + +/// A custom std::array with 256 entries, that also has a print function. +struct CRCTable : public std::array { + void print(raw_ostream &OS) const; +}; + +/// The structure that is returned when a polynomial algorithm was recognized by +/// the analysis. Currently, only the CRC algorithm is recognized. +struct PolynomialInfo { + // The small constant trip-count of the analyzed loop. + unsigned TripCount; + + // The LHS in a polynomial operation, or the initial variable of the + // computation, since all polynomial operations must have a constant RHS, + // which is the generating polynomial. It is the LHS of the polynomial + // division in the case of CRC. Since polynomial division is an XOR in + // GF(2^m), this variable must be XOR'ed with RHS in a loop to yield the + // ComputedValue. + const Value *LHS; + + // The generating polynomial, or the RHS of the polynomial division in the + // case of CRC. + APInt RHS; + + // The final computed value. This is a remainder of a polynomial division in + // the case of CRC, which must be zero. + const Value *ComputedValue; + + // Set to true in the case of big-endian. + bool ByteOrderSwapped; + + // An optional auxiliary checksum that augments the LHS. In the case of CRC, + // it is XOR'ed with the LHS, so that the computation's final remainder is + // zero. + const Value *LHSAux; + + PolynomialInfo(unsigned TripCount, const Value *LHS, const APInt &RHS, + const Value *ComputedValue, bool ByteOrderSwapped, + const Value *LHSAux = nullptr); +}; + +/// The analysis. +class HashRecognize { + const Loop &L; + ScalarEvolution &SE; + +public: + HashRecognize(const Loop &L, ScalarEvolution &SE); + + // The main analysis entry point. + std::variant recognizeCRC() const; + + // Auxilary entry point after analysis to interleave the generating polynomial + // and return a 256-entry CRC table. + CRCTable genSarwateTable(const APInt &GenPoly, bool ByteOrderSwapped) const; + + void print(raw_ostream &OS) const; +}; + +class HashRecognizePrinterPass + : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit HashRecognizePrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &); +}; + +class HashRecognizeAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = HashRecognize; + Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR); +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 8fe03d46946a4..cfde787b17790 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -78,6 +78,7 @@ add_llvm_component_library(LLVMAnalysis FunctionPropertiesAnalysis.cpp GlobalsModRef.cpp GuardUtils.cpp + HashRecognize.cpp HeatUtils.cpp IR2Vec.cpp IRSimilarityIdentifier.cpp diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp new file mode 100644 index 0000000000000..c6e9f2b64f876 --- /dev/null +++ b/llvm/lib/Analysis/HashRecognize.cpp @@ -0,0 +1,690 @@ +//===- HashRecognize.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The HashRecognize analysis recognizes unoptimized polynomial hash functions +// with operations over a Galois field of characteristic 2, also called binary +// fields, or GF(2^n): this class of hash functions can be optimized using a +// lookup-table-driven implementation, or with target-specific instructions. +// Examples: +// +// 1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2). +// 2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a +// rolling hash polynomial division in GF(2). +// 3. Rijndael MixColumns, a step in AES computation, which is a polynomial +// multiplication in GF(2^3). +// 4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM), +// which is a polynomial evaluation in GF(2^128). +// +// All of them use an irreducible generating polynomial of degree m, +// +// c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0 +// +// where each coefficient c is can take values in GF(2^n), where 2^n is termed +// the order of the Galois field. For GF(2), each coefficient can take values +// either 0 or 1, and the polynomial is simply represented by m+1 bits, +// corresponding to the coefficients. The different variants of CRC are named by +// degree of generating polynomial used: so CRC-32 would use a polynomial of +// degree 32. +// +// The reason algorithms on GF(2^n) can be optimized with a lookup-table is the +// following: in such fields, polynomial addition and subtraction are identical +// and equivalent to XOR, polynomial multiplication is an AND, and polynomial +// division is identity: the XOR and AND operations in unoptimized +// implementations are performed bit-wise, and can be optimized to be performed +// chunk-wise, by interleaving copies of the generating polynomial, and storing +// the pre-computed values in a table. +// +// A generating polynomial of m bits always has the MSB set, so we usually +// omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial: +// +// (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021 +// +// Transmissions are either in big-endian or little-endian form, and hash +// algorithms are written according to this. For example, IEEE 802 and RS-232 +// specify little-endian transmission. +// +//===----------------------------------------------------------------------===// +// +// At the moment, we only recognize the CRC algorithm. +// Documentation on CRC32 from the kernel: +// https://www.kernel.org/doc/Documentation/crc32.txt +// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/HashRecognize.h" +#include "llvm/ADT/APInt.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionPatternMatch.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" + +using namespace llvm; +using namespace PatternMatch; +using namespace SCEVPatternMatch; + +#define DEBUG_TYPE "hash-recognize" + +// KnownBits for a PHI node. There are at most two PHI nodes, corresponding to +// the Simple Recurrence and Conditional Recurrence. The IndVar PHI is not +// relevant. +using KnownPhiMap = SmallDenseMap; + +// A pair of a PHI node along with its incoming value from within a loop. +using PhiStepPair = std::pair; + +/// A much simpler version of ValueTracking, in that it computes KnownBits of +/// values, except that it computes the evolution of KnownBits in a loop with a +/// given trip count, and predication is specialized for a significant-bit +/// check. +class ValueEvolution { + const unsigned TripCount; + const bool ByteOrderSwapped; + APInt GenPoly; + StringRef ErrStr; + + // Compute the KnownBits of a BinaryOperator. + KnownBits computeBinOp(const BinaryOperator *I); + + // Compute the KnownBits of an Instruction. + KnownBits computeInstr(const Instruction *I); + + // Compute the KnownBits of a Value. + KnownBits compute(const Value *V); + +public: + // ValueEvolution is meant to be constructed with the TripCount of the loop, + // and whether the polynomial algorithm is big-endian, for the significant-bit + // check. + ValueEvolution(unsigned TripCount, bool ByteOrderSwapped); + + // Given a list of PHI nodes along with their incoming value from within the + // loop, computeEvolutions computes the KnownBits of each of the PHI nodes on + // the final iteration. Returns true on success and false on error. + bool computeEvolutions(ArrayRef PhiEvolutions); + + // In case ValueEvolution encounters an error, this is meant to be used for a + // precise error message. + StringRef getError() const { return ErrStr; } + + // The computed KnownBits for each PHI node, which is populated after + // computeEvolutions is called. + KnownPhiMap KnownPhis; +}; + +ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped) + : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {} + +KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I) { + KnownBits KnownL(compute(I->getOperand(0))); + KnownBits KnownR(compute(I->getOperand(1))); + + switch (I->getOpcode()) { + case Instruction::BinaryOps::And: + return KnownL & KnownR; + case Instruction::BinaryOps::Or: + return KnownL | KnownR; + case Instruction::BinaryOps::Xor: + return KnownL ^ KnownR; + case Instruction::BinaryOps::Shl: { + auto *OBO = cast(I); + return KnownBits::shl(KnownL, KnownR, OBO->hasNoUnsignedWrap(), + OBO->hasNoSignedWrap()); + } + case Instruction::BinaryOps::LShr: + return KnownBits::lshr(KnownL, KnownR); + case Instruction::BinaryOps::AShr: + return KnownBits::ashr(KnownL, KnownR); + case Instruction::BinaryOps::Add: { + auto *OBO = cast(I); + return KnownBits::add(KnownL, KnownR, OBO->hasNoUnsignedWrap(), + OBO->hasNoSignedWrap()); + } + case Instruction::BinaryOps::Sub: { + auto *OBO = cast(I); + return KnownBits::sub(KnownL, KnownR, OBO->hasNoUnsignedWrap(), + OBO->hasNoSignedWrap()); + } + case Instruction::BinaryOps::Mul: { + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + bool SelfMultiply = Op0 == Op1 && isGuaranteedNotToBeUndef(Op0); + return KnownBits::mul(KnownL, KnownR, SelfMultiply); + } + case Instruction::BinaryOps::UDiv: + return KnownBits::udiv(KnownL, KnownR); + case Instruction::BinaryOps::SDiv: + return KnownBits::sdiv(KnownL, KnownR); + case Instruction::BinaryOps::URem: + return KnownBits::urem(KnownL, KnownR); + case Instruction::BinaryOps::SRem: + return KnownBits::srem(KnownL, KnownR); + default: + ErrStr = "Unknown BinaryOperator"; + unsigned BitWidth = I->getType()->getScalarSizeInBits(); + return {BitWidth}; + } +} + +KnownBits ValueEvolution::computeInstr(const Instruction *I) { + unsigned BitWidth = I->getType()->getScalarSizeInBits(); + + // We look up in the map that contains the KnownBits of the PHI from the + // previous iteration. + if (const PHINode *P = dyn_cast(I)) + return KnownPhis.lookup_or(P, BitWidth); + + // Compute the KnownBits for a Select(Cmp()), forcing it to take the branch + // that is predicated on the (least|most)-significant-bit check. + CmpPredicate Pred; + Value *L, *R, *TV, *FV; + if (match(I, m_Select(m_ICmp(Pred, m_Value(L), m_Value(R)), m_Value(TV), + m_Value(FV)))) { + // We need to check LCR against [0, 2) in the little-endian case, because + // the RCR check is insufficient: it is simply [0, 1). + if (!ByteOrderSwapped) { + KnownBits KnownL = compute(L); + unsigned ICmpBW = KnownL.getBitWidth(); + auto LCR = ConstantRange::fromKnownBits(KnownL, false); + auto CheckLCR = ConstantRange(APInt::getZero(ICmpBW), APInt(ICmpBW, 2)); + if (LCR != CheckLCR) { + ErrStr = "Bad LHS of significant-bit-check"; + return {BitWidth}; + } + } + + // Check that the predication is on (most|least) significant bit. + KnownBits KnownR = compute(R); + unsigned ICmpBW = KnownR.getBitWidth(); + auto RCR = ConstantRange::fromKnownBits(KnownR, false); + auto AllowedR = ConstantRange::makeAllowedICmpRegion(Pred, RCR); + ConstantRange CheckRCR(APInt::getZero(ICmpBW), + ByteOrderSwapped ? APInt::getSignedMinValue(ICmpBW) + : APInt(ICmpBW, 1)); + if (AllowedR == CheckRCR) + return compute(TV); + if (AllowedR.inverse() == CheckRCR) + return compute(FV); + + ErrStr = "Bad RHS of significant-bit-check"; + return {BitWidth}; + } + + if (auto *BO = dyn_cast(I)) + return computeBinOp(BO); + + switch (I->getOpcode()) { + case Instruction::CastOps::Trunc: + return compute(I->getOperand(0)).trunc(BitWidth); + case Instruction::CastOps::ZExt: + return compute(I->getOperand(0)).zext(BitWidth); + case Instruction::CastOps::SExt: + return compute(I->getOperand(0)).sext(BitWidth); + default: + ErrStr = "Unknown Instruction"; + return {BitWidth}; + } +} + +KnownBits ValueEvolution::compute(const Value *V) { + if (auto *CI = dyn_cast(V)) + return KnownBits::makeConstant(CI->getValue()); + + if (auto *I = dyn_cast(V)) + return computeInstr(I); + + ErrStr = "Unknown Value"; + unsigned BitWidth = V->getType()->getScalarSizeInBits(); + return {BitWidth}; +} + +bool ValueEvolution::computeEvolutions(ArrayRef PhiEvolutions) { + for (unsigned I = 0; I < TripCount; ++I) { + for (auto [Phi, Step] : PhiEvolutions) { + KnownBits KnownAtIter = computeInstr(Step); + if (KnownAtIter.getBitWidth() < I + 1) { + ErrStr = "Loop iterations exceed bitwidth of result"; + return false; + } + KnownPhis.emplace_or_assign(Phi, KnownAtIter); + } + } + return ErrStr.empty(); +} + +/// A structure that can hold either a Simple Recurrence or a Conditional +/// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand +/// of the BO, while in a Conditional Recurrence, it is a SelectInst. +struct RecurrenceInfo { + const Loop &L; + const PHINode *Phi = nullptr; + BinaryOperator *BO = nullptr; + Value *Start = nullptr; + Value *Step = nullptr; + std::optional ExtraConst; + + RecurrenceInfo(const Loop &L) : L(L) {} + operator bool() const { return BO; } + + void print(raw_ostream &OS, unsigned Indent) const { + OS.indent(Indent) << "Phi: "; + Phi->print(OS); + OS << "\n"; + OS.indent(Indent) << "BinaryOperator: "; + BO->print(OS); + OS << "\n"; + OS.indent(Indent) << "Start: "; + Start->print(OS); + OS << "\n"; + OS.indent(Indent) << "Step: "; + Step->print(OS); + OS << "\n"; + if (ExtraConst) { + OS.indent(Indent) << "ExtraConst: "; + ExtraConst->print(OS, false); + OS << "\n"; + } + } + + bool matchSimpleRecurrence(const PHINode *P); + bool matchConditionalRecurrence( + const PHINode *P, + Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); + +private: + BinaryOperator *digRecurrence( + Instruction *V, + Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); +}; + +/// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence +/// cycle of the form: +/// +/// loop: +/// %rec = phi [%start, %entry], [%BO, %loop] +/// ... +/// %BO = binop %rec, %step +/// +/// or +/// +/// loop: +/// %rec = phi [%start, %entry], [%BO, %loop] +/// ... +/// %BO = binop %step, %rec +/// +bool RecurrenceInfo::matchSimpleRecurrence(const PHINode *P) { + Phi = P; + return llvm::matchSimpleRecurrence(Phi, BO, Start, Step); +} + +/// Digs for a recurrence starting with \p V hitting the PHI node in a use-def +/// chain. Used by matchConditionalRecurrence. +BinaryOperator * +RecurrenceInfo::digRecurrence(Instruction *V, + Instruction::BinaryOps BOWithConstOpToMatch) { + SmallVector Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + + // Don't add a PHI's operands to the Worklist. + if (isa(I)) + continue; + + // Find a recurrence over a BinOp, by matching either of its operands + // with with the PHINode. + if (match(I, m_c_BinOp(m_Value(), m_Specific(Phi)))) + return cast(I); + + // Bind to ExtraConst, if we match exactly one. + if (I->getOpcode() == BOWithConstOpToMatch) { + if (ExtraConst) + return nullptr; + const APInt *C = nullptr; + if (match(I, m_c_BinOp(m_APInt(C), m_Value()))) + ExtraConst = *C; + } + + // Continue along the use-def chain. + for (Use &U : I->operands()) + if (auto *UI = dyn_cast(U)) + if (L.contains(UI)) + Worklist.push_back(UI); + } + return nullptr; +} + +/// A Conditional Recurrence is a recurrence of the form: +/// +/// loop: +/// %rec = [%start, %entry], [%step, %loop] +/// ... +/// %step = select _, %tv, %fv +/// +/// where %tv and %fv ultimately end up using %rec via the same %BO instruction, +/// after digging through the use-def chain. +/// +/// ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when digging +/// the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is matched, +/// and ExtraConst is a constant operand of that BinOp. This peculiarity exists, +/// because in a CRC algorithm, the \p BOWithConstOpToMatch is an XOR, and the +/// ExtraConst ends up being the generating polynomial. +bool RecurrenceInfo::matchConditionalRecurrence( + const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) { + Phi = P; + if (Phi->getNumIncomingValues() != 2) + return false; + + for (unsigned Idx = 0; Idx != 2; ++Idx) { + Value *FoundStep = Phi->getIncomingValue(Idx); + Value *FoundStart = Phi->getIncomingValue(!Idx); + + Instruction *TV, *FV; + if (!match(FoundStep, + m_Select(m_Cmp(), m_Instruction(TV), m_Instruction(FV)))) + continue; + + // For a conditional recurrence, both the true and false values of the + // select must ultimately end up in the same recurrent BinOp. + BinaryOperator *FoundBO = digRecurrence(TV, BOWithConstOpToMatch); + BinaryOperator *AltBO = digRecurrence(FV, BOWithConstOpToMatch); + if (!FoundBO || FoundBO != AltBO) + return false; + + if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) { + LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp " + "with constant in conditional recurrence\n"); + return false; + } + + BO = FoundBO; + Start = FoundStart; + Step = FoundStep; + return true; + } + return false; +} + +/// Iterates over all the phis in \p LoopLatch, and attempts to extract a +/// Conditional Recurrence and an optional Simple Recurrence. +static std::optional> +getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) { + auto Phis = LoopLatch->phis(); + unsigned NumPhis = std::distance(Phis.begin(), Phis.end()); + if (NumPhis != 2 && NumPhis != 3) + return {}; + + RecurrenceInfo SimpleRecurrence(L); + RecurrenceInfo ConditionalRecurrence(L); + for (PHINode &P : Phis) { + if (&P == IndVar) + continue; + if (!SimpleRecurrence) + SimpleRecurrence.matchSimpleRecurrence(&P); + if (!ConditionalRecurrence) + ConditionalRecurrence.matchConditionalRecurrence( + &P, Instruction::BinaryOps::Xor); + } + if (NumPhis == 3 && (!SimpleRecurrence || !ConditionalRecurrence)) + return {}; + return std::make_pair(SimpleRecurrence, ConditionalRecurrence); +} + +PolynomialInfo::PolynomialInfo(unsigned TripCount, const Value *LHS, + const APInt &RHS, const Value *ComputedValue, + bool ByteOrderSwapped, const Value *LHSAux) + : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue), + ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {} + +/// In the big-endian case, checks the bottom N bits against CheckFn, and that +/// the rest are unknown. In the little-endian case, checks the top N bits +/// against CheckFn, and that the rest are unknown. Callers usually call this +/// function with N = TripCount, and CheckFn checking that the remainder bits of +/// the CRC polynomial division are zero. +static bool checkExtractBits(const KnownBits &Known, unsigned N, + function_ref CheckFn, + bool ByteOrderSwapped) { + // Check that the entire thing is a constant. + if (N == Known.getBitWidth()) + return CheckFn(Known.extractBits(N, 0)); + + // Check that the {top, bottom} N bits are not unknown and that the {bottom, + // top} N bits are known. + unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N; + unsigned SwappedBitPos = ByteOrderSwapped ? N : 0; + return CheckFn(Known.extractBits(N, BitPos)) && + Known.extractBits(Known.getBitWidth() - N, SwappedBitPos).isUnknown(); +} + +/// Generate a lookup table of 256 entries by interleaving the generating +/// polynomial. The optimization technique of table-lookup for CRC is also +/// called the Sarwate algorithm. +CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly, + bool ByteOrderSwapped) const { + unsigned BW = GenPoly.getBitWidth(); + CRCTable Table; + Table[0] = APInt::getZero(BW); + + if (ByteOrderSwapped) { + APInt CRCInit(BW, 128); + for (unsigned I = 1; I < 256; I <<= 1) { + CRCInit = CRCInit.shl(1) ^ + (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(BW)); + for (unsigned J = 0; J < I; ++J) + Table[I + J] = CRCInit ^ Table[J]; + } + return Table; + } + + APInt CRCInit(BW, 1); + for (unsigned I = 128; I; I >>= 1) { + CRCInit = CRCInit.lshr(1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(BW)); + for (unsigned J = 0; J < 256; J += (I << 1)) + Table[I + J] = CRCInit ^ Table[J]; + } + return Table; +} + +/// Checks if \p Reference is reachable from \p Needle on the use-def chain, and +/// that there are no stray PHI nodes while digging the use-def chain. \p +/// BOToMatch is a CRC peculiarity: at least one of the Users of Needle needs to +/// match this OpCode, which is XOR for CRC. +static bool arePHIsIntertwined( + const PHINode *Needle, const PHINode *Reference, const Loop &L, + Instruction::BinaryOps BOToMatch = Instruction::BinaryOpsEnd) { + // Initialize the worklist with Users of the Needle. + SmallVector Worklist; + for (const User *U : Needle->users()) { + if (auto *UI = dyn_cast(U)) + if (L.contains(UI)) + Worklist.push_back(UI); + } + + // BOToMatch is usually XOR for CRC. + if (BOToMatch != Instruction::BinaryOpsEnd) { + if (count_if(Worklist, [BOToMatch](const Instruction *I) { + return I->getOpcode() == BOToMatch; + }) != 1) + return false; + } + + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + + // Since Needle is never pushed onto the Worklist, I must either be the + // Reference PHI node (in which case we're done), or a stray PHI node (in + // which case we abort). + if (isa(I)) + return I == Reference; + + for (const Use &U : I->operands()) + if (auto *UI = dyn_cast(U)) + // Don't push Needle back onto the Worklist. + if (UI != Needle && L.contains(UI)) + Worklist.push_back(UI); + } + return false; +} + +// Recognizes a multiplication or division by the constant two, using SCEV. By +// doing this, we're immune to whether the IR expression is mul/udiv or +// equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul, +// and std::nullopt otherwise. +static std::optional isBigEndianBitShift(const SCEV *E) { + if (match(E, m_scev_UDiv(m_SCEV(), m_scev_SpecificInt(2)))) + return false; + if (match(E, m_scev_Mul(m_scev_SpecificInt(2), m_SCEV()))) + return true; + return {}; +} + +/// The main entry point for analyzing a loop and recognizing the CRC algorithm. +/// Returns a PolynomialInfo on success, and either an ErrBits or a StringRef on +/// failure. +std::variant +HashRecognize::recognizeCRC() const { + if (!L.isInnermost()) + return "Loop is not innermost"; + unsigned TC = SE.getSmallConstantMaxTripCount(&L); + if (!TC || TC > 256) + return "Unable to find a small constant trip count"; + BasicBlock *Latch = L.getLoopLatch(); + BasicBlock *Exit = L.getExitBlock(); + const PHINode *IndVar = L.getCanonicalInductionVariable(); + if (!Latch || !Exit || !IndVar) + return "Loop not in canonical form"; + + auto R = getRecurrences(Latch, IndVar, L); + if (!R) + return "Found stray PHI"; + auto [SimpleRecurrence, ConditionalRecurrence] = *R; + if (!ConditionalRecurrence) + return "Unable to find conditional recurrence"; + + // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv + // with two, or in other words, that they're single bit-shifts. + std::optional ByteOrderSwapped = + isBigEndianBitShift(SE.getSCEV(ConditionalRecurrence.BO)); + if (!ByteOrderSwapped) + return "Loop with non-unit bitshifts"; + if (SimpleRecurrence) { + if (isBigEndianBitShift(SE.getSCEV(SimpleRecurrence.BO)) != + ByteOrderSwapped) + return "Loop with non-unit bitshifts"; + if (!arePHIsIntertwined(SimpleRecurrence.Phi, ConditionalRecurrence.Phi, L, + Instruction::BinaryOps::Xor)) + return "Simple recurrence doesn't use conditional recurrence with XOR"; + } + + // Make sure that the computed value is used in the exit block: this should be + // true even if it is only really used in an outer loop's exit block, since + // the loop is in LCSSA form. + auto *ComputedValue = cast(ConditionalRecurrence.Step); + if (none_of(ComputedValue->users(), [Exit](User *U) { + auto *UI = dyn_cast(U); + return UI && UI->getParent() == Exit; + })) + return "Unable to find use of computed value in loop exit block"; + + assert(ConditionalRecurrence.ExtraConst && + "Expected ExtraConst in conditional recurrence"); + const APInt &GenPoly = *ConditionalRecurrence.ExtraConst; + + // PhiEvolutions are pairs of PHINodes along with their incoming value from + // within the loop, which we term as their step. Note that in the case of a + // Simple Recurrence, Step is an operand of the BO, while in a Conditional + // Recurrence, it is a SelectInst. + SmallVector PhiEvolutions; + PhiEvolutions.emplace_back(ConditionalRecurrence.Phi, ComputedValue); + if (SimpleRecurrence) + PhiEvolutions.emplace_back(SimpleRecurrence.Phi, SimpleRecurrence.BO); + + ValueEvolution VE(TC, *ByteOrderSwapped); + if (!VE.computeEvolutions(PhiEvolutions)) + return VE.getError(); + KnownBits ResultBits = VE.KnownPhis.at(ConditionalRecurrence.Phi); + + auto IsZero = [](const KnownBits &K) { return K.isZero(); }; + if (!checkExtractBits(ResultBits, TC, IsZero, *ByteOrderSwapped)) + return ErrBits(ResultBits, TC, *ByteOrderSwapped); + + const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr; + return PolynomialInfo(TC, ConditionalRecurrence.Start, GenPoly, ComputedValue, + *ByteOrderSwapped, LHSAux); +} + +void CRCTable::print(raw_ostream &OS) const { + for (unsigned I = 0; I < 256; I++) { + (*this)[I].print(OS, false); + OS << (I % 16 == 15 ? '\n' : ' '); + } +} + +void HashRecognize::print(raw_ostream &OS) const { + if (!L.isInnermost()) + return; + OS << "HashRecognize: Checking a loop in '" + << L.getHeader()->getParent()->getName() << "' from " << L.getLocStr() + << "\n"; + auto Ret = recognizeCRC(); + if (!std::holds_alternative(Ret)) { + OS << "Did not find a hash algorithm\n"; + if (std::holds_alternative(Ret)) + OS << "Reason: " << std::get(Ret) << "\n"; + if (std::holds_alternative(Ret)) { + auto [Actual, Iter, ByteOrderSwapped] = std::get(Ret); + OS << "Reason: Expected " << (ByteOrderSwapped ? "bottom " : "top ") + << Iter << " bits zero ("; + Actual.print(OS); + OS << ")\n"; + } + return; + } + + auto Info = std::get(Ret); + OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian ") + << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count " + << Info.TripCount << "\n"; + OS.indent(2) << "Initial CRC: "; + Info.LHS->print(OS); + OS << "\n"; + OS.indent(2) << "Generating polynomial: "; + Info.RHS.print(OS, false); + OS << "\n"; + OS.indent(2) << "Computed CRC: "; + Info.ComputedValue->print(OS); + OS << "\n"; + if (Info.LHSAux) { + OS.indent(2) << "Auxiliary data: "; + Info.LHSAux->print(OS); + OS << "\n"; + } + OS.indent(2) << "Computed CRC lookup table:\n"; + genSarwateTable(Info.RHS, Info.ByteOrderSwapped).print(OS); +} + +HashRecognize::HashRecognize(const Loop &L, ScalarEvolution &SE) + : L(L), SE(SE) {} + +PreservedAnalyses HashRecognizePrinterPass::run(Loop &L, + LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + AM.getResult(L, AR).print(OS); + return PreservedAnalyses::all(); +} + +HashRecognize HashRecognizeAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + return {L, AR.SE}; +} + +AnalysisKey HashRecognizeAnalysis::Key; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 80e0527ddac17..1e4299a0b9803 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/EphemeralValuesCache.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/HashRecognize.h" #include "llvm/Analysis/IR2Vec.h" #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/Analysis/IVUsers.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index be93a7b1f5ba6..e2824475ce8c3 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -662,6 +662,7 @@ LOOPNEST_PASS("no-op-loopnest", NoOpLoopNestPass()) #define LOOP_ANALYSIS(NAME, CREATE_PASS) #endif LOOP_ANALYSIS("ddg", DDGAnalysis()) +LOOP_ANALYSIS("hash-recognize", HashRecognizeAnalysis()) LOOP_ANALYSIS("iv-users", IVUsersAnalysis()) LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis()) LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) @@ -695,6 +696,7 @@ LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) LOOP_PASS("no-op-loop", NoOpLoopPass()) LOOP_PASS("print", PrintLoopPass(errs())) LOOP_PASS("print", DDGAnalysisPrinterPass(errs())) +LOOP_PASS("print", HashRecognizePrinterPass(errs())) LOOP_PASS("print", IVUsersPrinterPass(errs())) LOOP_PASS("print", LoopCachePrinterPass(errs())) LOOP_PASS("print", LoopNestPrinterPass(errs())) diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll new file mode 100644 index 0000000000000..3e05a9b5c8499 --- /dev/null +++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll @@ -0,0 +1,899 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +define i16 @crc16.le.tc8(i8 %msg, i16 %checksum) { +; CHECK-LABEL: 'crc16.le.tc8' +; CHECK-NEXT: Found little-endian CRC-16 loop with trip count 8 +; CHECK-NEXT: Initial CRC: i16 %checksum +; CHECK-NEXT: Generating polynomial: 40961 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor +; CHECK-NEXT: Auxiliary data: i8 %msg +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 49345 49537 320 49921 960 640 49729 50689 1728 1920 51009 1280 50625 50305 1088 +; CHECK-NEXT: 52225 3264 3456 52545 3840 53185 52865 3648 2560 51905 52097 2880 51457 2496 2176 51265 +; CHECK-NEXT: 55297 6336 6528 55617 6912 56257 55937 6720 7680 57025 57217 8000 56577 7616 7296 56385 +; CHECK-NEXT: 5120 54465 54657 5440 55041 6080 5760 54849 53761 4800 4992 54081 4352 53697 53377 4160 +; CHECK-NEXT: 61441 12480 12672 61761 13056 62401 62081 12864 13824 63169 63361 14144 62721 13760 13440 62529 +; CHECK-NEXT: 15360 64705 64897 15680 65281 16320 16000 65089 64001 15040 15232 64321 14592 63937 63617 14400 +; CHECK-NEXT: 10240 59585 59777 10560 60161 11200 10880 59969 60929 11968 12160 61249 11520 60865 60545 11328 +; CHECK-NEXT: 58369 9408 9600 58689 9984 59329 59009 9792 8704 58049 58241 9024 57601 8640 8320 57409 +; CHECK-NEXT: 40961 24768 24960 41281 25344 41921 41601 25152 26112 42689 42881 26432 42241 26048 25728 42049 +; CHECK-NEXT: 27648 44225 44417 27968 44801 28608 28288 44609 43521 27328 27520 43841 26880 43457 43137 26688 +; CHECK-NEXT: 30720 47297 47489 31040 47873 31680 31360 47681 48641 32448 32640 48961 32000 48577 48257 31808 +; CHECK-NEXT: 46081 29888 30080 46401 30464 47041 46721 30272 29184 45761 45953 29504 45313 29120 28800 45121 +; CHECK-NEXT: 20480 37057 37249 20800 37633 21440 21120 37441 38401 22208 22400 38721 21760 38337 38017 21568 +; CHECK-NEXT: 39937 23744 23936 40257 24320 40897 40577 24128 23040 39617 39809 23360 39169 22976 22656 38977 +; CHECK-NEXT: 34817 18624 18816 35137 19200 35777 35457 19008 19968 36545 36737 20288 36097 19904 19584 35905 +; CHECK-NEXT: 17408 33985 34177 17728 34561 18368 18048 34369 33281 17088 17280 33601 16640 33217 32897 16448 +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i16 %crc to i8 + %xor.data.crc = xor i8 %data, %crc.trunc + %and.data.crc = and i8 %xor.data.crc, 1 + %data.next = lshr i8 %data, 1 + %check.sb = icmp eq i8 %and.data.crc, 0 + %crc.lshr = lshr i16 %crc, 1 + %xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.le.tc8.udiv(i8 %msg, i16 %checksum) { +; CHECK-LABEL: 'crc16.le.tc8.udiv' +; CHECK-NEXT: Found little-endian CRC-16 loop with trip count 8 +; CHECK-NEXT: Initial CRC: i16 %checksum +; CHECK-NEXT: Generating polynomial: 40961 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor +; CHECK-NEXT: Auxiliary data: i8 %msg +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 49345 49537 320 49921 960 640 49729 50689 1728 1920 51009 1280 50625 50305 1088 +; CHECK-NEXT: 52225 3264 3456 52545 3840 53185 52865 3648 2560 51905 52097 2880 51457 2496 2176 51265 +; CHECK-NEXT: 55297 6336 6528 55617 6912 56257 55937 6720 7680 57025 57217 8000 56577 7616 7296 56385 +; CHECK-NEXT: 5120 54465 54657 5440 55041 6080 5760 54849 53761 4800 4992 54081 4352 53697 53377 4160 +; CHECK-NEXT: 61441 12480 12672 61761 13056 62401 62081 12864 13824 63169 63361 14144 62721 13760 13440 62529 +; CHECK-NEXT: 15360 64705 64897 15680 65281 16320 16000 65089 64001 15040 15232 64321 14592 63937 63617 14400 +; CHECK-NEXT: 10240 59585 59777 10560 60161 11200 10880 59969 60929 11968 12160 61249 11520 60865 60545 11328 +; CHECK-NEXT: 58369 9408 9600 58689 9984 59329 59009 9792 8704 58049 58241 9024 57601 8640 8320 57409 +; CHECK-NEXT: 40961 24768 24960 41281 25344 41921 41601 25152 26112 42689 42881 26432 42241 26048 25728 42049 +; CHECK-NEXT: 27648 44225 44417 27968 44801 28608 28288 44609 43521 27328 27520 43841 26880 43457 43137 26688 +; CHECK-NEXT: 30720 47297 47489 31040 47873 31680 31360 47681 48641 32448 32640 48961 32000 48577 48257 31808 +; CHECK-NEXT: 46081 29888 30080 46401 30464 47041 46721 30272 29184 45761 45953 29504 45313 29120 28800 45121 +; CHECK-NEXT: 20480 37057 37249 20800 37633 21440 21120 37441 38401 22208 22400 38721 21760 38337 38017 21568 +; CHECK-NEXT: 39937 23744 23936 40257 24320 40897 40577 24128 23040 39617 39809 23360 39169 22976 22656 38977 +; CHECK-NEXT: 34817 18624 18816 35137 19200 35777 35457 19008 19968 36545 36737 20288 36097 19904 19584 35905 +; CHECK-NEXT: 17408 33985 34177 17728 34561 18368 18048 34369 33281 17088 17280 33601 16640 33217 32897 16448 +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i16 %crc to i8 + %xor.data.crc = xor i8 %data, %crc.trunc + %and.data.crc = and i8 %xor.data.crc, 1 + %data.next = udiv i8 %data, 2 + %check.sb = icmp eq i8 %and.data.crc, 0 + %crc.lshr = udiv i16 %crc, 2 + %xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.le.tc16(i16 %msg, i16 %checksum) { +; CHECK-LABEL: 'crc16.le.tc16' +; CHECK-NEXT: Found little-endian CRC-16 loop with trip count 16 +; CHECK-NEXT: Initial CRC: i16 %checksum +; CHECK-NEXT: Generating polynomial: 40961 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor +; CHECK-NEXT: Auxiliary data: i16 %msg +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 49345 49537 320 49921 960 640 49729 50689 1728 1920 51009 1280 50625 50305 1088 +; CHECK-NEXT: 52225 3264 3456 52545 3840 53185 52865 3648 2560 51905 52097 2880 51457 2496 2176 51265 +; CHECK-NEXT: 55297 6336 6528 55617 6912 56257 55937 6720 7680 57025 57217 8000 56577 7616 7296 56385 +; CHECK-NEXT: 5120 54465 54657 5440 55041 6080 5760 54849 53761 4800 4992 54081 4352 53697 53377 4160 +; CHECK-NEXT: 61441 12480 12672 61761 13056 62401 62081 12864 13824 63169 63361 14144 62721 13760 13440 62529 +; CHECK-NEXT: 15360 64705 64897 15680 65281 16320 16000 65089 64001 15040 15232 64321 14592 63937 63617 14400 +; CHECK-NEXT: 10240 59585 59777 10560 60161 11200 10880 59969 60929 11968 12160 61249 11520 60865 60545 11328 +; CHECK-NEXT: 58369 9408 9600 58689 9984 59329 59009 9792 8704 58049 58241 9024 57601 8640 8320 57409 +; CHECK-NEXT: 40961 24768 24960 41281 25344 41921 41601 25152 26112 42689 42881 26432 42241 26048 25728 42049 +; CHECK-NEXT: 27648 44225 44417 27968 44801 28608 28288 44609 43521 27328 27520 43841 26880 43457 43137 26688 +; CHECK-NEXT: 30720 47297 47489 31040 47873 31680 31360 47681 48641 32448 32640 48961 32000 48577 48257 31808 +; CHECK-NEXT: 46081 29888 30080 46401 30464 47041 46721 30272 29184 45761 45953 29504 45313 29120 28800 45121 +; CHECK-NEXT: 20480 37057 37249 20800 37633 21440 21120 37441 38401 22208 22400 38721 21760 38337 38017 21568 +; CHECK-NEXT: 39937 23744 23936 40257 24320 40897 40577 24128 23040 39617 39809 23360 39169 22976 22656 38977 +; CHECK-NEXT: 34817 18624 18816 35137 19200 35777 35457 19008 19968 36545 36737 20288 36097 19904 19584 35905 +; CHECK-NEXT: 17408 33985 34177 17728 34561 18368 18048 34369 33281 17088 17280 33601 16640 33217 32897 16448 +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ] + %xor.crc.data = xor i16 %crc, %data + %and.crc.data = and i16 %xor.crc.data, 1 + %data.next = lshr i16 %data, 1 + %check.sb = icmp eq i16 %and.crc.data, 0 + %crc.lshr = lshr i16 %crc, 1 + %crc.xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 15 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) { +; CHECK-LABEL: 'crc16.be.tc8.crc.init.li' +; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8 +; CHECK-NEXT: Initial CRC: %crc.init = xor i16 %msg.shl, %checksum +; CHECK-NEXT: Generating polynomial: 4129 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840 +; CHECK-NEXT: 4096 4352 4608 4864 5120 5376 5632 5888 6144 6400 6656 6912 7168 7424 7680 7936 +; CHECK-NEXT: 8192 8448 8704 8960 9216 9472 9728 9984 10240 10496 10752 11008 11264 11520 11776 12032 +; CHECK-NEXT: 12288 12544 12800 13056 13312 13568 13824 14080 14336 14592 14848 15104 15360 15616 15872 16128 +; CHECK-NEXT: 16384 16640 16896 17152 17408 17664 17920 18176 18432 18688 18944 19200 19456 19712 19968 20224 +; CHECK-NEXT: 20480 20736 20992 21248 21504 21760 22016 22272 22528 22784 23040 23296 23552 23808 24064 24320 +; CHECK-NEXT: 24576 24832 25088 25344 25600 25856 26112 26368 26624 26880 27136 27392 27648 27904 28160 28416 +; CHECK-NEXT: 28672 28928 29184 29440 29696 29952 30208 30464 30720 30976 31232 31488 31744 32000 32256 32512 +; CHECK-NEXT: 32768 33024 33280 33536 33792 34048 34304 34560 34816 35072 35328 35584 35840 36096 36352 36608 +; CHECK-NEXT: 36864 37120 37376 37632 37888 38144 38400 38656 38912 39168 39424 39680 39936 40192 40448 40704 +; CHECK-NEXT: 40960 41216 41472 41728 41984 42240 42496 42752 43008 43264 43520 43776 44032 44288 44544 44800 +; CHECK-NEXT: 45056 45312 45568 45824 46080 46336 46592 46848 47104 47360 47616 47872 48128 48384 48640 48896 +; CHECK-NEXT: 49152 49408 49664 49920 50176 50432 50688 50944 51200 51456 51712 51968 52224 52480 52736 52992 +; CHECK-NEXT: 53248 53504 53760 54016 54272 54528 54784 55040 55296 55552 55808 56064 56320 56576 56832 57088 +; CHECK-NEXT: 57344 57600 57856 58112 58368 58624 58880 59136 59392 59648 59904 60160 60416 60672 60928 61184 +; CHECK-NEXT: 61440 61696 61952 62208 62464 62720 62976 63232 63488 63744 64000 64256 64512 64768 65024 65280 +; +entry: + %msg.ext = zext i8 %msg to i16 + %msg.shl = shl nuw i16 %msg.ext, 8 + %crc.init = xor i16 %msg.shl, %checksum + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.be.tc8.crc.init.arg(i16 %crc.init) { +; CHECK-LABEL: 'crc16.be.tc8.crc.init.arg' +; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8 +; CHECK-NEXT: Initial CRC: i16 %crc.init +; CHECK-NEXT: Generating polynomial: 4129 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840 +; CHECK-NEXT: 4096 4352 4608 4864 5120 5376 5632 5888 6144 6400 6656 6912 7168 7424 7680 7936 +; CHECK-NEXT: 8192 8448 8704 8960 9216 9472 9728 9984 10240 10496 10752 11008 11264 11520 11776 12032 +; CHECK-NEXT: 12288 12544 12800 13056 13312 13568 13824 14080 14336 14592 14848 15104 15360 15616 15872 16128 +; CHECK-NEXT: 16384 16640 16896 17152 17408 17664 17920 18176 18432 18688 18944 19200 19456 19712 19968 20224 +; CHECK-NEXT: 20480 20736 20992 21248 21504 21760 22016 22272 22528 22784 23040 23296 23552 23808 24064 24320 +; CHECK-NEXT: 24576 24832 25088 25344 25600 25856 26112 26368 26624 26880 27136 27392 27648 27904 28160 28416 +; CHECK-NEXT: 28672 28928 29184 29440 29696 29952 30208 30464 30720 30976 31232 31488 31744 32000 32256 32512 +; CHECK-NEXT: 32768 33024 33280 33536 33792 34048 34304 34560 34816 35072 35328 35584 35840 36096 36352 36608 +; CHECK-NEXT: 36864 37120 37376 37632 37888 38144 38400 38656 38912 39168 39424 39680 39936 40192 40448 40704 +; CHECK-NEXT: 40960 41216 41472 41728 41984 42240 42496 42752 43008 43264 43520 43776 44032 44288 44544 44800 +; CHECK-NEXT: 45056 45312 45568 45824 46080 46336 46592 46848 47104 47360 47616 47872 48128 48384 48640 48896 +; CHECK-NEXT: 49152 49408 49664 49920 50176 50432 50688 50944 51200 51456 51712 51968 52224 52480 52736 52992 +; CHECK-NEXT: 53248 53504 53760 54016 54272 54528 54784 55040 55296 55552 55808 56064 56320 56576 56832 57088 +; CHECK-NEXT: 57344 57600 57856 58112 58368 58624 58880 59136 59392 59648 59904 60160 60416 60672 60928 61184 +; CHECK-NEXT: 61440 61696 61952 62208 62464 62720 62976 63232 63488 63744 64000 64256 64512 64768 65024 65280 +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.be.tc8.crc.init.arg.flipped.sb.check(i16 %crc.init) { +; CHECK-LABEL: 'crc16.be.tc8.crc.init.arg.flipped.sb.check' +; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8 +; CHECK-NEXT: Initial CRC: i16 %crc.init +; CHECK-NEXT: Generating polynomial: 4129 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840 +; CHECK-NEXT: 4096 4352 4608 4864 5120 5376 5632 5888 6144 6400 6656 6912 7168 7424 7680 7936 +; CHECK-NEXT: 8192 8448 8704 8960 9216 9472 9728 9984 10240 10496 10752 11008 11264 11520 11776 12032 +; CHECK-NEXT: 12288 12544 12800 13056 13312 13568 13824 14080 14336 14592 14848 15104 15360 15616 15872 16128 +; CHECK-NEXT: 16384 16640 16896 17152 17408 17664 17920 18176 18432 18688 18944 19200 19456 19712 19968 20224 +; CHECK-NEXT: 20480 20736 20992 21248 21504 21760 22016 22272 22528 22784 23040 23296 23552 23808 24064 24320 +; CHECK-NEXT: 24576 24832 25088 25344 25600 25856 26112 26368 26624 26880 27136 27392 27648 27904 28160 28416 +; CHECK-NEXT: 28672 28928 29184 29440 29696 29952 30208 30464 30720 30976 31232 31488 31744 32000 32256 32512 +; CHECK-NEXT: 32768 33024 33280 33536 33792 34048 34304 34560 34816 35072 35328 35584 35840 36096 36352 36608 +; CHECK-NEXT: 36864 37120 37376 37632 37888 38144 38400 38656 38912 39168 39424 39680 39936 40192 40448 40704 +; CHECK-NEXT: 40960 41216 41472 41728 41984 42240 42496 42752 43008 43264 43520 43776 44032 44288 44544 44800 +; CHECK-NEXT: 45056 45312 45568 45824 46080 46336 46592 46848 47104 47360 47616 47872 48128 48384 48640 48896 +; CHECK-NEXT: 49152 49408 49664 49920 50176 50432 50688 50944 51200 51456 51712 51968 52224 52480 52736 52992 +; CHECK-NEXT: 53248 53504 53760 54016 54272 54528 54784 55040 55296 55552 55808 56064 56320 56576 56832 57088 +; CHECK-NEXT: 57344 57600 57856 58112 58368 58624 58880 59136 59392 59648 59904 60160 60416 60672 60928 61184 +; CHECK-NEXT: 61440 61696 61952 62208 62464 62720 62976 63232 63488 63744 64000 64256 64512 64768 65024 65280 +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp sge i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i8 @crc8.be.tc8.ptr.nested.loop(ptr %msg, i32 %loop.limit) { +; CHECK-LABEL: 'crc8.be.tc8.ptr.nested.loop' +; CHECK-NEXT: Found big-endian CRC-8 loop with trip count 8 +; CHECK-NEXT: Initial CRC: %crc.init = xor i8 %msg.load, %crc.outer +; CHECK-NEXT: Generating polynomial: 29 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i8 %crc.xor, i8 %crc.shl +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 29 58 39 116 105 78 83 232 245 210 207 156 129 166 187 +; CHECK-NEXT: 205 208 247 234 185 164 131 158 37 56 31 2 81 76 107 118 +; CHECK-NEXT: 135 154 189 160 243 238 201 212 111 114 85 72 27 6 33 60 +; CHECK-NEXT: 74 87 112 109 62 35 4 25 162 191 152 133 214 203 236 241 +; CHECK-NEXT: 19 14 41 52 103 122 93 64 251 230 193 220 143 146 181 168 +; CHECK-NEXT: 222 195 228 249 170 183 144 141 54 43 12 17 66 95 120 101 +; CHECK-NEXT: 148 137 174 179 224 253 218 199 124 97 70 91 8 21 50 47 +; CHECK-NEXT: 89 68 99 126 45 48 23 10 177 172 139 150 197 216 255 226 +; CHECK-NEXT: 38 59 28 1 82 79 104 117 206 211 244 233 186 167 128 157 +; CHECK-NEXT: 235 246 209 204 159 130 165 184 3 30 57 36 119 106 77 80 +; CHECK-NEXT: 161 188 155 134 213 200 239 242 73 84 115 110 61 32 7 26 +; CHECK-NEXT: 108 113 86 75 24 5 34 63 132 153 190 163 240 237 202 215 +; CHECK-NEXT: 53 40 15 18 65 92 123 102 221 192 231 250 169 180 147 142 +; CHECK-NEXT: 248 229 194 223 140 145 182 171 16 13 42 55 100 121 94 67 +; CHECK-NEXT: 178 175 136 149 198 219 252 225 90 71 96 125 46 51 20 9 +; CHECK-NEXT: 127 98 69 88 11 22 49 44 151 138 173 176 227 254 217 196 +; +entry: + br label %outer.loop + +outer.loop: ; preds = %inner.exit, %entry + %crc.outer = phi i8 [ 0, %entry ], [ %crc.next, %inner.exit ] + %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %inner.exit ] + %outer.exit.cond = icmp ult i32 %outer.iv, %loop.limit + br i1 %outer.exit.cond, label %ph, label %exit + +ph: ; preds = %outer.loop + %outer.iv.ext = sext i32 %outer.iv to i64 + %msg.outer.iv = getelementptr inbounds i8, ptr %msg, i64 %outer.iv.ext + %msg.load = load i8, ptr %msg.outer.iv, align 1 + %crc.init = xor i8 %msg.load, %crc.outer + br label %inner.loop + +inner.loop: ; preds = %inner.loop, %ph + %inner.iv = phi i32 [ 0, %ph ], [ %inner.iv.next, %inner.loop ] + %crc = phi i8 [ %crc.init, %ph ], [ %crc.next, %inner.loop ] + %crc.shl = shl i8 %crc, 1 + %crc.xor = xor i8 %crc.shl, 29 + %check.sb = icmp slt i8 %crc, 0 + %crc.next = select i1 %check.sb, i8 %crc.xor, i8 %crc.shl + %inner.iv.next = add nuw nsw i32 %inner.iv, 1 + %exit.cond = icmp samesign ult i32 %inner.iv, 7 + br i1 %exit.cond, label %inner.loop, label %inner.exit + +inner.exit: ; preds = %inner.loop + %outer.iv.next = add i32 %outer.iv, 1 + br label %outer.loop + +exit: ; preds = %outer.loop + ret i8 %crc.outer +} + +define i32 @crc32.le.tc8.data32(i32 %checksum, i32 %msg) { +; CHECK-LABEL: 'crc32.le.tc8.data32' +; CHECK-NEXT: Found little-endian CRC-32 loop with trip count 8 +; CHECK-NEXT: Initial CRC: i32 %checksum +; CHECK-NEXT: Generating polynomial: 33800 +; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor +; CHECK-NEXT: Auxiliary data: i32 %msg +; CHECK-NEXT: Computed CRC lookup table: +; CHECK-NEXT: 0 4489 8978 12955 17956 22445 25910 29887 35912 40385 44890 48851 51820 56293 59774 63735 +; CHECK-NEXT: 4225 264 13203 8730 22181 18220 30135 25662 40137 36160 49115 44626 56045 52068 63999 59510 +; CHECK-NEXT: 8450 12427 528 5017 26406 30383 17460 21949 44362 48323 36440 40913 60270 64231 51324 55797 +; CHECK-NEXT: 12675 8202 4753 792 30631 26158 21685 17724 48587 44098 40665 36688 64495 60006 55549 51572 +; CHECK-NEXT: 16900 21389 24854 28831 1056 5545 10034 14011 52812 57285 60766 64727 34920 39393 43898 47859 +; CHECK-NEXT: 21125 17164 29079 24606 5281 1320 14259 9786 57037 53060 64991 60502 39145 35168 48123 43634 +; CHECK-NEXT: 25350 29327 16404 20893 9506 13483 1584 6073 61262 65223 52316 56789 43370 47331 35448 39921 +; CHECK-NEXT: 29575 25102 20629 16668 13731 9258 5809 1848 65487 60998 56541 52564 47595 43106 39673 35696 +; CHECK-NEXT: 33800 38273 42778 46739 49708 54181 57662 61623 2112 6601 11090 15067 20068 24557 28022 31999 +; CHECK-NEXT: 38025 34048 47003 42514 53933 49956 61887 57398 6337 2376 15315 10842 24293 20332 32247 27774 +; CHECK-NEXT: 42250 46211 34328 38801 58158 62119 49212 53685 10562 14539 2640 7129 28518 32495 19572 24061 +; CHECK-NEXT: 46475 41986 38553 34576 62383 57894 53437 49460 14787 10314 6865 2904 32743 28270 23797 19836 +; CHECK-NEXT: 50700 55173 58654 62615 32808 37281 41786 45747 19012 23501 26966 30943 3168 7657 12146 16123 +; CHECK-NEXT: 54925 50948 62879 58390 37033 33056 46011 41522 23237 19276 31191 26718 7393 3432 16371 11898 +; CHECK-NEXT: 59150 63111 50204 54677 41258 45219 33336 37809 27462 31439 18516 23005 11618 15595 3696 8185 +; CHECK-NEXT: 63375 58886 54429 50452 45483 40994 37561 33584 31687 27214 22741 18780 15843 11370 7921 3960 +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %xor.crc.data = xor i32 %crc, %data + %sb.crc.data = and i32 %xor.crc.data, 1 + %check.sb = icmp eq i32 %sb.crc.data, 0 + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i32 %crc.next +} + +; Negative tests + +define i16 @not.crc.non.const.tc(i16 %crc.init, i32 %loop.limit) { +; CHECK-LABEL: 'not.crc.non.const.tc' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Unable to find a small constant trip count +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp sge i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, %loop.limit + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.non.canonical.loop(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.non.canonical.loop' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Loop not in canonical form +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 7, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = sub nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign eq i32 %iv, 0 + br i1 %exit.cond, label %exit, label %loop + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.tc.limit(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.tc.limit' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Unable to find a small constant trip count +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, 512 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.no.conditional.recurrence(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.no.conditional.recurrence' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Unable to find conditional recurrence +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %shl = shl i16 %crc, 1 + %crc.next = xor i16 %shl, 258 + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + + +define i16 @not.crc.bad.shift.recurrence(i16 %checksum, i8 %msg) { +; CHECK-LABEL: 'not.crc.bad.shift.recurrence' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Loop with non-unit bitshifts +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %crc.lshr = lshr i16 %crc, 8 + %data.ext = zext i8 %data to i16 + %xor.crc.data = xor i16 %crc.lshr, %data.ext + %check.sb = icmp samesign ult i16 %xor.crc.data, 128 + %crc.and = and i16 %crc, 32767 + %crc.xor = xor i16 %crc.and, 258 + %crc.next = select i1 %check.sb, i16 %crc.and, i16 %crc.xor + %data.next = shl i8 %data, 1 + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.nonunit.shifts(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.nonunit.shifts' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Loop with non-unit bitshifts +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 2 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.result.unused(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.result.unused' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Unable to find use of computed value in loop exit block +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc +} + +define i16 @not.crc.wrong.sb.check.const(i8 %msg, i16 %checksum) { +; CHECK-LABEL: 'not.crc.wrong.sb.check.const' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %crc.lshr = lshr i16 %crc, 8 + %data.ext = zext i8 %data to i16 + %xor.crc.data = xor i16 %crc.lshr, %data.ext + %check.sb = icmp samesign ult i16 %xor.crc.data, 128 + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 258 + %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor + %data.next = shl i8 %data, 1 + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.wrong.sb.check.pred(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.wrong.sb.check.pred' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Bad RHS of significant-bit-check +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp sgt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.excess.tc(i16 %msg, i16 %checksum) { +; CHECK-LABEL: 'not.crc.excess.tc' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Loop iterations exceed bitwidth of result +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ] + %xor.crc.data = xor i16 %crc, %data + %and.crc.data = and i16 %xor.crc.data, 1 + %data.next = lshr i16 %data, 1 + %check.sb = icmp eq i16 %and.crc.data, 0 + %crc.lshr = lshr i16 %crc, 1 + %crc.xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 20 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i32 @not.crc.unknown.icmp.rhs(i32 %checksum, i32 %msg, i32 %unknown) { +; CHECK-LABEL: 'not.crc.unknown.icmp.rhs' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Bad LHS of significant-bit-check +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %xor.crc.data = xor i32 %crc, %data + %sb.crc.data = or i32 %xor.crc.data, 1 + %check.sb = icmp eq i32 %sb.crc.data, %unknown + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i32 %crc.next +} + +define i32 @not.crc.unknown.icmp.lhs(i32 %checksum, i32 %msg, i32 %unknown) { +; CHECK-LABEL: 'not.crc.unknown.icmp.lhs' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Bad LHS of significant-bit-check +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %xor.crc.data = xor i32 %crc, %data + %sb.crc.data = or i32 %xor.crc.data, %unknown + %check.sb = icmp eq i32 %sb.crc.data, 0 + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i32 %crc.next +} + + +define i16 @not.crc.stray.or(i16 %msg, i16 %checksum) { +; CHECK-LABEL: 'not.crc.stray.or' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Bad LHS of significant-bit-check +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ] + %xor.crc.data = xor i16 %crc, %data + %and.crc.data = and i16 %xor.crc.data, 1 + %crc.corrupt = or i16 %and.crc.data, 1 + %data.next = lshr i16 %data, 1 + %check.sb = icmp ne i16 %crc.corrupt, 0 + %crc.lshr = lshr i16 %crc, 1 + %crc.xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 15 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.inverse.sb.check(i16 %msg, i16 %checksum) { +; CHECK-LABEL: 'not.crc.inverse.sb.check' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Expected top 16 bits zero (1100000000000001) +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ] + %xor.crc.data = xor i16 %crc, %data + %and.crc.data = and i16 %xor.crc.data, 1 + %data.next = lshr i16 %data, 1 + %check.sb = icmp ne i16 %and.crc.data, 0 + %crc.lshr = lshr i16 %crc, 1 + %crc.xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 15 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc1.tc8.sb.check.endian.mismatch(i8 %msg, i16 %checksum) { +; CHECK-LABEL: 'crc1.tc8.sb.check.endian.mismatch' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Bad RHS of significant-bit-check +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i16 %crc to i8 + %xor.data.crc = xor i8 %data, %crc.trunc + %and.data.crc = and i8 %xor.data.crc, 1 + %data.next = mul i8 %data, 2 + %check.sb = icmp eq i8 %and.data.crc, 0 + %crc.lshr = mul i16 %crc, 2 + %xor = xor i16 %crc.lshr, 0 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.init.arg.inverted.select(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.init.arg.inverted.select' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Expected top 8 bits zero (11000000????????) +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %sb.crc = and i16 %crc, 1 + %check.sb = icmp eq i16 %sb.crc, 0 + %crc.lshr = lshr i16 %crc, 1 + %crc.xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.lshr + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i32 @not.crc.dead.msg.bad.use(i32 %checksum, i32 %msg) { +; CHECK-LABEL: 'not.crc.dead.msg.bad.use' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %data.or = or i32 %data, -1 + %xor.crc.data = xor i32 %crc, %data.or + %sb.crc.data = and i32 %xor.crc.data, 1 + %check.sb = icmp eq i32 %sb.crc.data, 0 + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i32 %crc.next +} + +define i16 @not.crc.dead.msg.no.use(i8 %msg, i16 %checksum) { +; CHECK-LABEL: 'not.crc.dead.msg.no.use' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i16 %crc to i8 + %and.crc = and i8 %crc.trunc, 1 + %data.next = lshr i8 %data, 1 + %check.sb = icmp eq i8 %and.crc, 0 + %crc.lshr = lshr i16 %crc, 1 + %xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + %data.zext = zext i8 %data.next to i16 + %ret = xor i16 %crc.next, %data.zext + ret i16 %ret +} + +define i32 @not.crc.dead.msg.wrong.op(i32 %checksum, i32 %msg) { +; CHECK-LABEL: 'not.crc.dead.msg.wrong.op' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %or.crc.data = or i32 %crc, %data + %sb.crc.data = and i32 %or.crc.data, 1 + %check.sb = icmp eq i32 %sb.crc.data, 0 + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i32 %crc.next +} + +define i16 @not.crc.float.simple.recurrence(float %msg, i16 %checksum) { +; CHECK-LABEL: 'not.crc.float.simple.recurrence' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Found stray PHI +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi float [ %msg, %entry ], [ %data.next, %loop ] + %crc.conv = sitofp i16 %crc to float + %frem.data.crc = frem float %data, %crc.conv + %and.data.crc = fdiv float %frem.data.crc, 2.0 + %data.next = fdiv float %data, 2.0 + %check.sb = fcmp oeq float %and.data.crc, 0.0 + %crc.lshr = lshr i16 %crc, 1 + %xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp samesign ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 8de408ac3fb38..3d08c3f142db7 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -56,6 +56,7 @@ static_library("Analysis") { "FunctionPropertiesAnalysis.cpp", "GlobalsModRef.cpp", "GuardUtils.cpp", + "HashRecognize.cpp", "HeatUtils.cpp", "IR2Vec.cpp", "IRSimilarityIdentifier.cpp", diff --git a/llvm/utils/update_analyze_test_checks.py b/llvm/utils/update_analyze_test_checks.py index 6c194f0923d17..3f14452767f9e 100755 --- a/llvm/utils/update_analyze_test_checks.py +++ b/llvm/utils/update_analyze_test_checks.py @@ -109,10 +109,13 @@ def update_test(opt_basename: str, ti: common.TestInfo): prefixes, ) elif ( - re.search(r"(LV|LDist): Checking a loop in ", raw_tool_outputs) is not None + re.search( + r"(LV|LDist|HashRecognize): Checking a loop in ", raw_tool_outputs + ) + is not None ): for raw_tool_output in re.split( - r"(LV|LDist): Checking a loop in ", raw_tool_outputs + r"(LV|LDist|HashRecognize): Checking a loop in ", raw_tool_outputs ): builder.process_run_line( common.LOOP_PASS_DEBUG_RE,