diff --git a/llvm/include/llvm/FileCheck/FileCheck.h b/llvm/include/llvm/FileCheck/FileCheck.h index d6d8dc531e100..321ce1d26e163 100644 --- a/llvm/include/llvm/FileCheck/FileCheck.h +++ b/llvm/include/llvm/FileCheck/FileCheck.h @@ -187,24 +187,14 @@ class FileCheck { explicit FileCheck(FileCheckRequest Req); ~FileCheck(); - // Combines the check prefixes into a single regex so that we can efficiently - // scan for any of the set. - // - // The semantics are that the longest-match wins which matches our regex - // library. - Regex buildCheckPrefixRegex(); - /// Reads the check file from \p Buffer and records the expected strings it /// contains. Errors are reported against \p SM. /// - /// Only expected strings whose prefix is one of those listed in \p PrefixRE - /// are recorded. \returns true in case of an error, false otherwise. - /// /// If \p ImpPatBufferIDRange, then the range (inclusive start, exclusive end) /// of IDs for source buffers added to \p SM for implicit patterns are /// recorded in it. The range is empty if there are none. bool - readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, + readCheckFile(SourceMgr &SM, StringRef Buffer, std::pair *ImpPatBufferIDRange = nullptr); bool ValidateCheckPrefixes(); diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp index bef6dd69fadfd..ac8f52d586bc3 100644 --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -1634,6 +1634,60 @@ static size_t SkipWord(StringRef Str, size_t Loc) { return Loc; } +static const char *DefaultCheckPrefixes[] = {"CHECK"}; +static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; + +static void addDefaultPrefixes(FileCheckRequest &Req) { + if (Req.CheckPrefixes.empty()) { + for (const char *Prefix : DefaultCheckPrefixes) + Req.CheckPrefixes.push_back(Prefix); + Req.IsDefaultCheckPrefix = true; + } + if (Req.CommentPrefixes.empty()) + for (const char *Prefix : DefaultCommentPrefixes) + Req.CommentPrefixes.push_back(Prefix); +} + +struct PrefixMatcher { + /// Prefixes and their first occurrence past the current position. + SmallVector> Prefixes; + StringRef Input; + + PrefixMatcher(ArrayRef CheckPrefixes, + ArrayRef CommentPrefixes, StringRef Input) + : Input(Input) { + for (StringRef Prefix : CheckPrefixes) + Prefixes.push_back({Prefix, Input.find(Prefix)}); + for (StringRef Prefix : CommentPrefixes) + Prefixes.push_back({Prefix, Input.find(Prefix)}); + + // Sort by descending length. + llvm::sort(Prefixes, + [](auto A, auto B) { return A.first.size() > B.first.size(); }); + } + + /// Find the next match of a prefix in Buffer. + /// Returns empty StringRef if not found. + StringRef match(StringRef Buffer) { + assert(Buffer.data() >= Input.data() && + Buffer.data() + Buffer.size() == Input.data() + Input.size() && + "Buffer must be suffix of Input"); + + size_t From = Buffer.data() - Input.data(); + StringRef Match; + for (auto &[Prefix, Pos] : Prefixes) { + // If the last occurrence was before From, find the next one after From. + if (Pos < From) + Pos = Input.find(Prefix, From); + // Find the first prefix with the lowest position. + if (Pos != StringRef::npos && + (Match.empty() || size_t(Match.data() - Input.data()) > Pos)) + Match = StringRef(Input.substr(Pos, Prefix.size())); + } + return Match; + } +}; + /// Searches the buffer for the first prefix in the prefix regular expression. /// /// This searches the buffer using the provided regular expression, however it @@ -1658,20 +1712,16 @@ static size_t SkipWord(StringRef Str, size_t Loc) { /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy /// is unspecified. static std::pair -FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE, +FindFirstMatchingPrefix(const FileCheckRequest &Req, PrefixMatcher &Matcher, StringRef &Buffer, unsigned &LineNumber, Check::FileCheckType &CheckTy) { - SmallVector Matches; - while (!Buffer.empty()) { - // Find the first (longest) match using the RE. - if (!PrefixRE.match(Buffer, &Matches)) + // Find the first (longest) prefix match. + StringRef Prefix = Matcher.match(Buffer); + if (Prefix.empty()) // No match at all, bail. return {StringRef(), StringRef()}; - StringRef Prefix = Matches[0]; - Matches.clear(); - assert(Prefix.data() >= Buffer.data() && Prefix.data() < Buffer.data() + Buffer.size() && "Prefix doesn't start inside of buffer!"); @@ -1720,7 +1770,7 @@ FileCheck::FileCheck(FileCheckRequest Req) FileCheck::~FileCheck() = default; bool FileCheck::readCheckFile( - SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, + SourceMgr &SM, StringRef Buffer, std::pair *ImpPatBufferIDRange) { if (ImpPatBufferIDRange) ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0; @@ -1769,6 +1819,8 @@ bool FileCheck::readCheckFile( // found. unsigned LineNumber = 1; + addDefaultPrefixes(Req); + PrefixMatcher Matcher(Req.CheckPrefixes, Req.CommentPrefixes, Buffer); std::set PrefixesNotFound(Req.CheckPrefixes.begin(), Req.CheckPrefixes.end()); const size_t DistinctPrefixes = PrefixesNotFound.size(); @@ -1779,7 +1831,7 @@ bool FileCheck::readCheckFile( StringRef UsedPrefix; StringRef AfterSuffix; std::tie(UsedPrefix, AfterSuffix) = - FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy); + FindFirstMatchingPrefix(Req, Matcher, Buffer, LineNumber, CheckTy); if (UsedPrefix.empty()) break; if (CheckTy != Check::CheckComment) @@ -2431,9 +2483,6 @@ static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes, return true; } -static const char *DefaultCheckPrefixes[] = {"CHECK"}; -static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; - bool FileCheck::ValidateCheckPrefixes() { StringSet<> UniquePrefixes; // Add default prefixes to catch user-supplied duplicates of them below. @@ -2454,33 +2503,6 @@ bool FileCheck::ValidateCheckPrefixes() { return true; } -Regex FileCheck::buildCheckPrefixRegex() { - if (Req.CheckPrefixes.empty()) { - for (const char *Prefix : DefaultCheckPrefixes) - Req.CheckPrefixes.push_back(Prefix); - Req.IsDefaultCheckPrefix = true; - } - if (Req.CommentPrefixes.empty()) { - for (const char *Prefix : DefaultCommentPrefixes) - Req.CommentPrefixes.push_back(Prefix); - } - - // We already validated the contents of CheckPrefixes and CommentPrefixes so - // just concatenate them as alternatives. - SmallString<32> PrefixRegexStr; - for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) { - if (I != 0) - PrefixRegexStr.push_back('|'); - PrefixRegexStr.append(Req.CheckPrefixes[I]); - } - for (StringRef Prefix : Req.CommentPrefixes) { - PrefixRegexStr.push_back('|'); - PrefixRegexStr.append(Prefix); - } - - return Regex(PrefixRegexStr); -} - Error FileCheckPatternContext::defineCmdlineVariables( ArrayRef CmdlineDefines, SourceMgr &SM) { assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && diff --git a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h index 27d599671db6d..06786b15252a0 100644 --- a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h +++ b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h @@ -189,8 +189,7 @@ static inline bool CheckMachineFunction(const MachineFunction &MF, SourceMgr SM; SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"), SMLoc()); - Regex PrefixRE = FC.buildCheckPrefixRegex(); - if (FC.readCheckFile(SM, CheckFileText, PrefixRE)) + if (FC.readCheckFile(SM, CheckFileText)) return false; auto OutBuffer = OutputBuf->getBuffer(); diff --git a/llvm/unittests/MIR/MachineMetadata.cpp b/llvm/unittests/MIR/MachineMetadata.cpp index f50e9b562942b..bea11e4c734fc 100644 --- a/llvm/unittests/MIR/MachineMetadata.cpp +++ b/llvm/unittests/MIR/MachineMetadata.cpp @@ -193,8 +193,7 @@ static bool checkOutput(std::string CheckString, std::string Output) { SourceMgr SM; SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"), SMLoc()); - Regex PrefixRE = FC.buildCheckPrefixRegex(); - if (FC.readCheckFile(SM, CheckFileText, PrefixRE)) + if (FC.readCheckFile(SM, CheckFileText)) return false; auto OutBuffer = OutputBuffer->getBuffer(); diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp index 5f85f4f75d13a..e74a79e1312b2 100644 --- a/llvm/utils/FileCheck/FileCheck.cpp +++ b/llvm/utils/FileCheck/FileCheck.cpp @@ -810,17 +810,6 @@ int main(int argc, char **argv) { if (!FC.ValidateCheckPrefixes()) return 2; - Regex PrefixRE = FC.buildCheckPrefixRegex(); - std::string REError; - if (!PrefixRE.isValid(REError)) { - errs() << "Unable to combine check-prefix strings into a prefix regular " - "expression! This is likely a bug in FileCheck's verification of " - "the check-prefix strings. Regular expression parsing failed " - "with the following error: " - << REError << "\n"; - return 2; - } - SourceMgr SM; // Read the expected strings from the check file. @@ -842,7 +831,7 @@ int main(int argc, char **argv) { SMLoc()); std::pair ImpPatBufferIDRange; - if (FC.readCheckFile(SM, CheckFileText, PrefixRE, &ImpPatBufferIDRange)) + if (FC.readCheckFile(SM, CheckFileText, &ImpPatBufferIDRange)) return 2; // Open the file to check and add it to SourceMgr.