diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index b393c85321b7d..509f9aedda801 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -140,6 +140,7 @@ struct BinaryBasicBlockProfile { std::vector CallSites; std::vector Successors; std::vector PseudoProbes; + std::string PseudoProbesStr; bool operator==(const BinaryBasicBlockProfile &Other) const { return Index == Other.Index; @@ -163,6 +164,7 @@ template <> struct MappingTraits { std::vector()); YamlIO.mapOptional("probes", BBP.PseudoProbes, std::vector()); + YamlIO.mapOptional("probe", BBP.PseudoProbesStr, std::string()); } }; @@ -207,6 +209,7 @@ struct BinaryFunctionProfile { uint64_t ExternEntryCount{0}; std::vector Blocks; std::vector InlineTree; + std::string InlineTreeStr; bool Used{false}; }; } // end namespace bolt @@ -223,6 +226,7 @@ template <> struct MappingTraits { std::vector()); YamlIO.mapOptional("inline_tree", BFP.InlineTree, std::vector()); + YamlIO.mapOptional("ppit", BFP.InlineTreeStr, std::string()); } }; diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h index 9a7d5f6a1b5b8..fada73eff75e4 100644 --- a/bolt/include/bolt/Profile/YAMLProfileWriter.h +++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h @@ -41,10 +41,9 @@ class YAMLProfileWriter { GUIDNumMap HashIdxMap; }; - static std::tuple, InlineTreeMapTy> - convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, - const InlineTreeDesc &InlineTree, - const BinaryFunction &BF); + static InlineTreeMapTy convertBFInlineTree( + const MCPseudoProbeDecoder &Decoder, const InlineTreeDesc &InlineTree, + const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF); static std::tuple convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder); diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 5c7f1b94315f0..33321e30a8ff0 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -134,6 +134,7 @@ enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL }; extern llvm::cl::bits GadgetScannersToRun; +enum ProbesWriteMode : char { PWM_None = 0, PWM_Default, PWM_Compact }; } // namespace opts namespace llvm { diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index ed20ff3941cee..930f971c29bf2 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -114,7 +114,7 @@ MaxSamples("max-samples", cl::cat(AggregatorCategory)); extern cl::opt ProfileFormat; -extern cl::opt ProfileWritePseudoProbes; +extern cl::opt ProfileWritePseudoProbes; extern cl::opt SaveProfile; cl::opt ReadPreAggregated( @@ -2387,9 +2387,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, if (PseudoProbeDecoder) { DenseMap InlineTreeNodeId; - std::tie(YamlBF.InlineTree, InlineTreeNodeId) = - YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder, - InlineTree, *BF); + InlineTreeNodeId = YAMLProfileWriter::convertBFInlineTree( + *PseudoProbeDecoder, InlineTree, *BF, YamlBF); // Fetch probes belonging to all fragments const AddressProbesMap &ProbeMap = PseudoProbeDecoder->getAddress2ProbesMap(); diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index d773490723b2e..edc443b04b7f5 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -19,6 +19,7 @@ #include "llvm/Demangle/Demangle.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Timer.h" #undef DEBUG_TYPE #define DEBUG_TYPE "bolt-prof" @@ -369,7 +370,22 @@ bool YAMLProfileReader::parseFunctionProfile( // Probe inline tree: move InlineTreeIndex into InlineTreeNodes. void YAMLProfileReader::decodeYamlInlineTree( yaml::bolt::BinaryFunctionProfile &YamlBF) { - const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc; + if (YamlBF.InlineTree.empty() && YamlBF.InlineTreeStr.empty()) + return; + // Decompress inline tree + SmallVector Fields; + if (!YamlBF.InlineTreeStr.empty()) { + for (StringRef NodeStr : llvm::split(YamlBF.InlineTreeStr, ' ')) { + yaml::bolt::InlineTreeNode &Node = YamlBF.InlineTree.emplace_back(); + NodeStr.split(Fields, '_'); + if (Fields[0].empty()) + Node.GUIDIndex = UINT32_MAX; + else + Fields[0].getAsInteger(36, Node.GUIDIndex); + Fields[1].getAsInteger(36, Node.ParentIndex); + Fields[2].getAsInteger(36, Node.CallSiteProbe); + } + } uint32_t ParentId = 0; uint32_t PrevGUIDIdx = 0; for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlBF.InlineTree) { @@ -390,6 +406,33 @@ void YAMLProfileReader::decodeYamlInlineTree( for (const yaml::bolt::PseudoProbeInfo &PI : YamlBB.PseudoProbes) for (const uint64_t Node : PI.InlineTreeNodes) YamlGUIDs.emplace(YamlBF.InlineTree[Node].GUID); + // Decompress probe descriptors + auto decompressList = [](StringRef List, auto Vec) { + for (StringRef BlockStr : llvm::split(List, ',')) { + StringRef ValStr, RepStr; + uint64_t Val = 0, Rep = 0; + std::tie(ValStr, RepStr) = BlockStr.split('^'); + ValStr.getAsInteger(36, Val); + RepStr.getAsInteger(36, Rep); + llvm::copy(llvm::seq(Val, Val + Rep), std::back_inserter(Vec)); + } + }; + auto decompressField = [&](StringRef Field, auto Vec, uint32_t Default) { + if (Field.empty()) + Vec.emplace_back(Default); + else + decompressList(Field, Vec); + }; + for (yaml::bolt::BinaryBasicBlockProfile &BB : YamlBF.Blocks) { + if (BB.PseudoProbesStr.empty()) + continue; + for (StringRef ProbeStr : llvm::split(BB.PseudoProbesStr, ' ')) { + yaml::bolt::PseudoProbeInfo &PI = BB.PseudoProbes.emplace_back(); + ProbeStr.split(Fields, '_'); + decompressField(Fields[0], PI.BlockProbes, 1); + decompressField(Fields[1], PI.InlineTreeNodes, 0); + } + } } Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 9d6a3983c3d03..aad433a8b1cf8 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -14,6 +14,7 @@ #include "bolt/Profile/ProfileReaderBase.h" #include "bolt/Rewrite/RewriteInstance.h" #include "bolt/Utils/CommandLineOpts.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -25,10 +26,14 @@ namespace opts { using namespace llvm; extern cl::opt ProfileUseDFS; -cl::opt ProfileWritePseudoProbes( +cl::opt ProfileWritePseudoProbes( "profile-write-pseudo-probes", - cl::desc("Use pseudo probes in profile generation"), cl::Hidden, - cl::cat(BoltOptCategory)); + cl::desc("Write pseudo probes into YAML profile"), + cl::init(ProbesWriteMode::PWM_None), + cl::values(clEnumValN(ProbesWriteMode::PWM_Default, "", "default"), + clEnumValN(ProbesWriteMode::PWM_Compact, "compact", + "compressed encoding")), + cl::ValueOptional, cl::cat(BoltOutputCategory)); } // namespace opts namespace llvm { @@ -149,6 +154,37 @@ void YAMLProfileWriter::BlockProbeCtx::finalize( } }; + auto compressVec = [](auto Vec, std::string &Out, bool Probes) { + if (Vec.size() == 1) { + if (Probes ? Vec.front() == 1 : Vec.front() == 0) + return; + } + // Invariant: current run is [StartGroup, EndGroup] + auto StartGroup = Vec.begin(); + auto EndGroup = Vec.begin(); + ListSeparator LS(","); + while (StartGroup != Vec.end()) { + ++EndGroup; + const size_t Distance = EndGroup - StartGroup; + const uint64_t Delta = *EndGroup - *StartGroup; + const bool AtEnd = EndGroup == Vec.end(); + // Happy case: advance the group + if (!AtEnd && Delta == Distance) + continue; + // Otherwise print the current group + Out += LS; + APInt Int(64, *StartGroup); + Out += toString(Int, 36, false); + + Int = APInt(64, Distance - 1, false); + if (Distance > 1) + Out += '^' + toString(Int, 36, false); + if (AtEnd) + break; + StartGroup = EndGroup; + } + }; + // Check identical block probes and merge them std::unordered_map, std::vector, ProbeHasher> ProbesToNodes; @@ -156,10 +192,19 @@ void YAMLProfileWriter::BlockProbeCtx::finalize( llvm::sort(Probes); ProbesToNodes[Probes].emplace_back(NodeId); } + ListSeparator LS(" "); + std::string &Probe = YamlBB.PseudoProbesStr; for (auto &[Probes, Nodes] : ProbesToNodes) { llvm::sort(Nodes); - YamlBB.PseudoProbes.emplace_back( - yaml::bolt::PseudoProbeInfo{Probes, Nodes}); + if (opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Default) { + YamlBB.PseudoProbes.emplace_back( + yaml::bolt::PseudoProbeInfo{Probes, Nodes}); + } else { + Probe += LS; + compressVec(Probes, Probe, true); + Probe += "_"; + compressVec(Nodes, Probe, false); + } } for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) { auto It = CallProbes.find(CSI.Offset); @@ -183,18 +228,17 @@ void YAMLProfileWriter::BlockProbeCtx::finalize( } } -std::tuple, - YAMLProfileWriter::InlineTreeMapTy> -YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, - const InlineTreeDesc &InlineTree, - const BinaryFunction &BF) { +YAMLProfileWriter::InlineTreeMapTy YAMLProfileWriter::convertBFInlineTree( + const MCPseudoProbeDecoder &Decoder, const InlineTreeDesc &InlineTree, + const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF) { DenseMap InlineTreeNodeId; std::vector YamlInlineTree; + std::string InlineTreeStr; uint64_t Addr = BF.getAddress(); uint64_t Size = BF.getSize(); auto Probes = Decoder.getAddress2ProbesMap().find(Addr, Addr + Size); if (Probes.empty()) - return {YamlInlineTree, InlineTreeNodeId}; + return InlineTreeNodeId; const MCDecodedPseudoProbe &Probe = *Probes.begin(); const MCDecodedPseudoProbeInlineTree *Root = Probe.getInlineTreeNode(); while (Root->hasInlineSite()) @@ -215,7 +259,30 @@ YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx, 0, 0}); PrevParent = Node.ParentId; } - return {YamlInlineTree, InlineTreeNodeId}; + if (opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Default) { + YamlBF.InlineTree = YamlInlineTree; + return InlineTreeNodeId; + } + assert(opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Compact); + // Write compact form + std::string &Out = YamlBF.InlineTreeStr; + raw_string_ostream OS(Out); + ListSeparator LS(" "); + for (const yaml::bolt::InlineTreeNode &Node : YamlInlineTree) { + Out += LS; + APInt Int(32, Node.GUIDIndex); + if (Node.GUIDIndex != UINT32_MAX) + Out += toString(Int, 36, false); + Out += '_'; + Int = APInt(32, Node.ParentIndexDelta); + if (Node.ParentIndexDelta) + Out += toString(Int, 36, false); + Out += '_'; + Int = APInt(32, Node.CallSiteProbe); + if (Node.CallSiteProbe) + Out += toString(Int, 36, false); + } + return InlineTreeNodeId; } yaml::bolt::BinaryFunctionProfile @@ -241,8 +308,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, YamlBF.ExternEntryCount = BF.getExternEntryCount(); DenseMap InlineTreeNodeId; if (PseudoProbeDecoder) - std::tie(YamlBF.InlineTree, InlineTreeNodeId) = - convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF); + InlineTreeNodeId = + convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF, YamlBF); BinaryFunction::BasicBlockOrderType Order; llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(), @@ -418,7 +485,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { // Add probe inline tree nodes. InlineTreeDesc InlineTree; if (const MCPseudoProbeDecoder *Decoder = - opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) + opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder); // Add all function objects. diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 947d8992890d4..cc0103261560c 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -50,7 +50,7 @@ static cl::opt PrintPseudoProbes( clEnumValN(PPP_All, "all", "enable all debugging printout")), cl::Hidden, cl::cat(BoltCategory)); -extern cl::opt ProfileWritePseudoProbes; +extern cl::opt ProfileWritePseudoProbes; extern cl::opt StaleMatchingWithPseudoProbes; } // namespace opts