diff --git a/third_party/proton/csrc/include/Data/Metric.h b/third_party/proton/csrc/include/Data/Metric.h index 7be999af54c3..e4da22a1ed91 100644 --- a/third_party/proton/csrc/include/Data/Metric.h +++ b/third_party/proton/csrc/include/Data/Metric.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ class Metric { virtual const std::string &getName() const = 0; - virtual const std::string &getValueName(int valueId) const = 0; + virtual std::string_view getValueName(int valueId) const = 0; virtual bool isProperty(int valueId) const = 0; @@ -81,8 +82,8 @@ class Metric { if (values[valueId].index() != value.index()) { throw makeInvalidArgument( std::string("Metric value type mismatch for valueId ") + - std::to_string(valueId) + " (" + getValueName(valueId) + ")" + - ": current=" + getTypeNameForIndex(values[valueId].index()) + + std::to_string(valueId) + " (" + std::string(getValueName(valueId)) + + ")" + ": current=" + getTypeNameForIndex(values[valueId].index()) + ", new=" + getTypeNameForIndex(value.index())); } // Handle string and other values separately @@ -105,7 +106,8 @@ class Metric { throw makeInvalidArgument( std::string("Vector metric size mismatch for " "valueId ") + - std::to_string(valueId) + " (" + getValueName(valueId) + + std::to_string(valueId) + " (" + + std::string(getValueName(valueId)) + "): current=" + std::to_string(currentValue.size()) + ", new=" + std::to_string(otherValue.size())); } @@ -116,7 +118,8 @@ class Metric { throw makeLogicError( std::string("Metric aggregation not supported for " "valueId ") + - std::to_string(valueId) + " (" + getValueName(valueId) + + std::to_string(valueId) + " (" + + std::string(getValueName(valueId)) + "): type=" + getTypeNameForIndex(values[valueId].index())); } } @@ -172,7 +175,8 @@ class FlexibleMetric : public Metric { const std::string &getName() const override { return name; } - const std::string &getValueName(int valueId) const override { + // Flexible metrics carry their name as per-instance state. + std::string_view getValueName(int valueId) const override { return valueName; } @@ -219,7 +223,13 @@ class KernelMetric : public Metric { const std::string &getName() const override { return name; } - const std::string &getValueName(int valueId) const override { + // Fast path for callers that already know they are working with KernelMetric. + static constexpr std::string_view getValueName(kernelMetricKind valueId) { + return VALUE_NAMES[valueId]; + } + + // Virtual access used through the Metric interface. + std::string_view getValueName(int valueId) const override { return VALUE_NAMES[valueId]; } @@ -232,7 +242,7 @@ class KernelMetric : public Metric { true, true, false, false, true, true, true, true}; const static inline bool EXCLUSIVE[kernelMetricKind::Count] = { false, false, false, false, true, true, true, true}; - const static inline std::string VALUE_NAMES[kernelMetricKind::Count] = { + static constexpr std::string_view VALUE_NAMES[kernelMetricKind::Count] = { "start_time (ns)", "end_time (ns)", "count", "time (ns)", "device_id", "device_type", "stream_id", "is_metric_kernel", }; @@ -278,14 +288,21 @@ class PCSamplingMetric : public Metric { const std::string &getName() const override { return name; } - const std::string &getValueName(int valueId) const override { + // Fast path for callers that already know they are working with + // PCSamplingMetric. + static constexpr std::string_view getValueName(PCSamplingMetricKind valueId) { + return VALUE_NAMES[valueId]; + } + + // Virtual access used through the Metric interface. + std::string_view getValueName(int valueId) const override { return VALUE_NAMES[valueId]; } bool isProperty(int valueId) const override { return false; } bool isExclusive(int valueId) const override { return false; } - const static inline std::string VALUE_NAMES[PCSamplingMetricKind::Count] = { + static constexpr std::string_view VALUE_NAMES[PCSamplingMetricKind::Count] = { "num_samples", "num_stalled_samples", "stalled_branch_resolving", @@ -359,7 +376,13 @@ class CycleMetric : public Metric { const std::string &getName() const override { return name; } - const std::string &getValueName(int valueId) const override { + // Fast path for callers that already know they are working with CycleMetric. + static constexpr std::string_view getValueName(CycleMetricKind valueId) { + return VALUE_NAMES[valueId]; + } + + // Virtual access used through the Metric interface. + std::string_view getValueName(int valueId) const override { return VALUE_NAMES[valueId]; } @@ -374,7 +397,7 @@ class CycleMetric : public Metric { const static inline bool EXCLUSIVE[CycleMetricKind::Count] = { false, false, true, true, true, true, true, true, true, true, true, true, false, false, false}; - const static inline std::string VALUE_NAMES[CycleMetricKind::Count] = { + static constexpr std::string_view VALUE_NAMES[CycleMetricKind::Count] = { "start_cycle", "end_cycle", "cycles", "normalized_cycles", "kernel_id", "kernel_name", "block_id", "processor_id", "unit_id", "device_id", "device_type", "time_shift_cost", diff --git a/third_party/proton/csrc/include/Profiler/Graph.h b/third_party/proton/csrc/include/Profiler/Graph.h index a4491da51b07..3cea6e70ae32 100644 --- a/third_party/proton/csrc/include/Profiler/Graph.h +++ b/third_party/proton/csrc/include/Profiler/Graph.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -45,6 +44,7 @@ struct GraphState { // Capture tag to identify captured call paths static constexpr const char *captureTag = ""; static constexpr const char *metricTag = ""; + struct NodeState { // The graph node id for this node uint64_t nodeId{}; diff --git a/third_party/proton/csrc/include/Utility/MsgPackWriter.h b/third_party/proton/csrc/include/Utility/MsgPackWriter.h index 639b4cdfad2a..b37e3efbe954 100644 --- a/third_party/proton/csrc/include/Utility/MsgPackWriter.h +++ b/third_party/proton/csrc/include/Utility/MsgPackWriter.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -21,6 +22,17 @@ class MsgPackWriter { void packInt(int64_t value); void packDouble(double value); void packStr(std::string_view value); + template void packFixStrLiteral(const char (&value)[N]) { + static_assert(N > 0); + constexpr uint32_t size = static_cast(N - 1); + // MsgPack fixstr stores the string length in 5 bits, so literals must fit + // in the 0..31 byte range. + static_assert(size <= 31); + out.push_back(static_cast(0xa0 | size)); + const auto offset = out.size(); + out.resize(offset + size); + std::memcpy(out.data() + offset, value, size); + } void packArray(uint32_t size); void packMap(uint32_t size); diff --git a/third_party/proton/csrc/lib/Data/TreeData.cpp b/third_party/proton/csrc/lib/Data/TreeData.cpp index 381c50e0ed4c..16a92c1c972c 100644 --- a/third_party/proton/csrc/lib/Data/TreeData.cpp +++ b/third_party/proton/csrc/lib/Data/TreeData.cpp @@ -3,21 +3,24 @@ #include "Data/Metric.h" #include "Device.h" #include "DeviceType.h" +#include "Profiler/Graph.h" #include "Utility/Errors.h" #include "Utility/MsgPackWriter.h" +#include #include #include #include #include #include #include +#include #include #include -#include #include #include #include #include +#include #include namespace proton { @@ -159,9 +162,10 @@ class TreeData::Tree { const FlexibleMetric &flexibleMetric) { auto &node = treeNodeMap.at(contextId); auto &flexibleMetrics = node.metricSet.flexibleMetrics; - auto it = flexibleMetrics.find(flexibleMetric.getValueName(0)); + auto valueName = std::string(flexibleMetric.getValueName(0)); + auto it = flexibleMetrics.find(valueName); if (it == flexibleMetrics.end()) { - flexibleMetrics.emplace(flexibleMetric.getValueName(0), flexibleMetric); + flexibleMetrics.emplace(std::move(valueName), flexibleMetric); } else { it->second.updateMetric(flexibleMetric); } @@ -218,7 +222,8 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, output.push_back(json::object()); jsonNodes[TreeData::Tree::TreeNode::RootId] = &(output.back()); MetricSummary metricSummary; - const auto &virtualRootNode = virtualTree->getNode(Tree::TreeNode::RootId); + // Append fixed-schema metrics to a JSON metrics object and update device + // metadata requirements while visiting them. auto appendMetrics = [&](json &metricsJson, const std::map> &metrics) { @@ -234,26 +239,25 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, std::get(kernelMetric->getValue(KernelMetric::DeviceId)); uint64_t deviceType = std::get( kernelMetric->getValue(KernelMetric::DeviceType)); + metricSummary.hasKernelMetric = true; + metricSummary.updateDeviceIdMask(deviceType, deviceId); const auto &deviceTypeName = getDeviceTypeString(static_cast(deviceType)); - const auto &durationName = - kernelMetric->getValueName(KernelMetric::Duration); - const auto &invocationsName = - kernelMetric->getValueName(KernelMetric::Invocations); - const auto &deviceIdName = - kernelMetric->getValueName(KernelMetric::DeviceId); - const auto &deviceTypeNameKey = - kernelMetric->getValueName(KernelMetric::DeviceType); const auto deviceIdStr = std::to_string(deviceId); - metricsJson[durationName] = duration; - metricsJson[invocationsName] = invocations; - metricsJson[deviceIdName] = deviceIdStr; - metricsJson[deviceTypeNameKey] = deviceTypeName; + metricsJson[KernelMetric::getValueName(KernelMetric::Duration)] = + duration; + metricsJson[KernelMetric::getValueName(KernelMetric::Invocations)] = + invocations; + metricsJson[KernelMetric::getValueName(KernelMetric::DeviceId)] = + deviceIdStr; + metricsJson[KernelMetric::getValueName(KernelMetric::DeviceType)] = + deviceTypeName; } else if (metricKind == MetricKind::PCSampling) { auto *pcSamplingMetric = static_cast(metric.get()); for (size_t i = 0; i < PCSamplingMetric::Count; i++) { - const auto &valueName = pcSamplingMetric->getValueName(i); + const auto valueName = PCSamplingMetric::getValueName( + static_cast(i)); std::visit([&](auto &&value) { metricsJson[valueName] = value; }, pcSamplingMetric->getValues()[i]); } @@ -267,21 +271,17 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, std::get(cycleMetric->getValue(CycleMetric::DeviceId)); uint64_t deviceType = std::get(cycleMetric->getValue(CycleMetric::DeviceType)); - const auto &durationName = - cycleMetric->getValueName(CycleMetric::Duration); - const auto &normalizedDurationName = - cycleMetric->getValueName(CycleMetric::NormalizedDuration); - const auto &deviceIdName = - cycleMetric->getValueName(CycleMetric::DeviceId); - const auto &deviceTypeName = - cycleMetric->getValueName(CycleMetric::DeviceType); const auto deviceIdStr = std::to_string(deviceId); const auto deviceTypeStr = std::to_string(deviceType); - metricsJson[durationName] = duration; - metricsJson[normalizedDurationName] = normalizedDuration; - metricsJson[deviceIdName] = deviceIdStr; - metricsJson[deviceTypeName] = deviceTypeStr; + metricsJson[CycleMetric::getValueName(CycleMetric::Duration)] = + duration; + metricsJson[CycleMetric::getValueName( + CycleMetric::NormalizedDuration)] = normalizedDuration; + metricsJson[CycleMetric::getValueName(CycleMetric::DeviceId)] = + deviceIdStr; + metricsJson[CycleMetric::getValueName(CycleMetric::DeviceType)] = + deviceTypeStr; } else if (metricKind == MetricKind::Flexible) { // Flexible metrics are handled in a different way } else { @@ -289,11 +289,13 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, } } }; + // Append user-defined flexible metrics, preserving scalar and vector value + // types in the JSON output. auto appendFlexibleMetrics = [&](json &metricsJson, const std::map &flexibleMetrics) { for (const auto &[_, flexibleMetric] : flexibleMetrics) { - const auto &valueName = flexibleMetric.getValueName(0); + const auto valueName = flexibleMetric.getValueName(0); std::visit( [&](auto &&v) { using T = std::decay_t; @@ -305,9 +307,8 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, } else if constexpr (std::is_same_v> || std::is_same_v> || std::is_same_v>) { - metricsJson[valueName] = json::array(); - auto &arr = metricsJson[valueName]; - arr.get_ref().reserve(v.size()); + auto &arr = metricsJson[valueName] = json::array(); + arr.template get_ref().reserve(v.size()); for (const auto &value : v) { arr.push_back(value); } @@ -320,7 +321,7 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, }; tree->template walk( [&](TreeData::Tree::TreeNode &treeNode) { - const auto contextName = treeNode.name; + const auto &contextName = treeNode.name; auto contextId = treeNode.id; json *jsonNode = jsonNodes[contextId]; (*jsonNode)["frame"] = {{"name", contextName}, {"type", "function"}}; @@ -330,6 +331,8 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, appendFlexibleMetrics(metricsJson, treeNode.metricSet.flexibleMetrics); auto &childrenArray = (*jsonNode)["children"]; childrenArray = json::array(); + const auto &virtualRootNode = + virtualTree->getNode(Tree::TreeNode::RootId); const bool hasLinkedTargets = !treeNode.metricSet.linkedMetrics.empty() || !treeNode.metricSet.linkedFlexibleMetrics.empty(); @@ -343,6 +346,8 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, if (!hasLinkedTargets) { return; } + // JSON dumping is not the performance-critical path, so use a direct + // recursive copy of the linked virtual tree. std::function appendLinkedVirtualNode = [&](size_t virtualNodeId, json &outNode, json &parentMetricsJson) { const auto &virtualNode = virtualTree->getNode(virtualNodeId); @@ -357,10 +362,12 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, if (metricsIt != treeNode.metricSet.linkedMetrics.end()) { appendMetrics(outNode["metrics"], metricsIt->second); } - // Linked flexible metrics are only attached to - // children, so they always belong on the parent frame. + // Linked flexible metrics are attached to generated + // helper nodes, but they belong on the helper's parent frame. + // Other linked virtual nodes should not carry flexible metrics. if (flexibleIt != - treeNode.metricSet.linkedFlexibleMetrics.end()) { + treeNode.metricSet.linkedFlexibleMetrics.end() && + virtualNode.name == GraphState::metricTag) { appendFlexibleMetrics(parentMetricsJson, flexibleIt->second); } outNode["children"] = json::array(); @@ -374,32 +381,29 @@ json TreeData::buildHatchetJson(TreeData::Tree *tree, } }; - for (const auto &virtualChild : virtualRootNode.children) { + for (const auto &child : virtualRootNode.children) { json linkedRootChildNode; - appendLinkedVirtualNode(virtualChild.id, linkedRootChildNode, - metricsJson); + appendLinkedVirtualNode(child.id, linkedRootChildNode, metricsJson); childrenArray.push_back(std::move(linkedRootChildNode)); } }); if (metricSummary.hasKernelMetric) { - KernelMetric kernelMetric; output[TreeData::Tree::TreeNode::RootId]["metrics"] - [kernelMetric.getValueName(KernelMetric::Invocations)] = 0; + [KernelMetric::getValueName(KernelMetric::Invocations)] = 0; output[TreeData::Tree::TreeNode::RootId]["metrics"] - [kernelMetric.getValueName(KernelMetric::Duration)] = 0; + [KernelMetric::getValueName(KernelMetric::Duration)] = 0; } if (metricSummary.hasCycleMetric) { - CycleMetric cycleMetric; output[TreeData::Tree::TreeNode::RootId]["metrics"] - [cycleMetric.getValueName(CycleMetric::Duration)] = 0; + [CycleMetric::getValueName(CycleMetric::Duration)] = 0; output[TreeData::Tree::TreeNode::RootId]["metrics"] - [cycleMetric.getValueName(CycleMetric::NormalizedDuration)] = 0; + [CycleMetric::getValueName(CycleMetric::NormalizedDuration)] = 0; } if (metricSummary.hasPCSamplingMetric) { - PCSamplingMetric pcSamplingMetric; for (size_t i = 0; i < PCSamplingMetric::Count; i++) { - const auto &valueName = pcSamplingMetric.getValueName(i); + const auto valueName = PCSamplingMetric::getValueName( + static_cast(i)); output[TreeData::Tree::TreeNode::RootId]["metrics"][valueName] = 0; } } @@ -445,9 +449,9 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, writer.reserve(16 * 1024 * 1024); // 16 MB MetricSummary metricSummary; - const std::map> emptyMetrics; - const auto &virtualRootNode = virtualTree->getNode(Tree::TreeNode::RootId); - + // Root metrics are serialized before descendants, so first scan the whole + // concrete tree for fixed-schema metric kinds. This lets the root emit the + // zero-valued Hatchet fields required for any metric kind present below it. tree->template walk( [&](TreeData::Tree::TreeNode &treeNode) { metricSummary.observeMetrics(treeNode.metricSet.metrics); @@ -456,77 +460,28 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, metricSummary.observeMetrics(linkedMetrics); } }); - - // We only need these metrics for tree data - KernelMetric kernelMetric; - auto &kernelMetricDurationName = - kernelMetric.getValueName(KernelMetric::Duration); - auto &kernelMetricInvocationsName = - kernelMetric.getValueName(KernelMetric::Invocations); - auto &kernelMetricDeviceIdName = - kernelMetric.getValueName(KernelMetric::DeviceId); - auto &kernelMetricDeviceTypeName = - kernelMetric.getValueName(KernelMetric::DeviceType); - CycleMetric cycleMetric; - auto &cycleMetricDurationName = - cycleMetric.getValueName(CycleMetric::Duration); - auto &cycleMetricNormalizedDurationName = - cycleMetric.getValueName(CycleMetric::NormalizedDuration); - auto &cycleMetricDeviceIdName = - cycleMetric.getValueName(CycleMetric::DeviceId); - auto &cycleMetricDeviceTypeName = - cycleMetric.getValueName(CycleMetric::DeviceType); - std::set kernelInclusiveValueNames = { - kernelMetricDurationName, kernelMetricInvocationsName}; - std::set kernelExclusiveValueNames = { - kernelMetricDeviceIdName, kernelMetricDeviceTypeName}; - std::set cycleInclusiveValueNames = { - cycleMetricDurationName, cycleMetricNormalizedDurationName}; - std::set cycleExclusiveValueNames = {cycleMetricDeviceIdName, - cycleMetricDeviceTypeName}; - const auto kernelInclusiveCount = - static_cast(kernelInclusiveValueNames.size()); - const auto kernelTotalCount = static_cast( - kernelInclusiveValueNames.size() + kernelExclusiveValueNames.size()); - const auto cycleInclusiveCount = - static_cast(cycleInclusiveValueNames.size()); - const auto cycleTotalCount = static_cast( - cycleInclusiveValueNames.size() + cycleExclusiveValueNames.size()); - - auto packFlexibleMetricValue = [&](const MetricValueType &value) { - std::visit( - [&](auto &&v) { - using T = std::decay_t; - if constexpr (std::is_same_v) { - writer.packUInt(v); - } else if constexpr (std::is_same_v) { - writer.packInt(v); - } else if constexpr (std::is_same_v) { - writer.packDouble(v); - } else if constexpr (std::is_same_v) { - writer.packStr(v); - } else if constexpr (std::is_same_v>) { - writer.packArray(static_cast(v.size())); - for (auto value : v) { - writer.packUInt(value); - } - } else if constexpr (std::is_same_v>) { - writer.packArray(static_cast(v.size())); - for (auto value : v) { - writer.packInt(value); - } - } else if constexpr (std::is_same_v>) { - writer.packArray(static_cast(v.size())); - for (auto value : v) { - writer.packDouble(value); - } - } else { - static_assert(sizeof(T) == 0, "Unsupported MetricValueType"); - } - }, - value); + const auto &virtualRootNode = virtualTree->getNode(Tree::TreeNode::RootId); + auto packHatchetFrameHeader = [&](std::string_view name) { + writer.packMap(3); + writer.packFixStrLiteral("frame"); + writer.packMap(2); + writer.packFixStrLiteral("name"); + writer.packStr(name); + writer.packFixStrLiteral("type"); + writer.packFixStrLiteral("function"); + writer.packFixStrLiteral("metrics"); }; + // Root metrics only carry inclusive aggregate fields. Non-root metrics also + // include device_id and device_type, so their serialized map entry counts are + // larger. + constexpr uint32_t kernelInclusiveCount = 2; // duration, count + constexpr uint32_t kernelTotalCount = 4; // + device_id, device_type + constexpr uint32_t cycleInclusiveCount = 2; // duration, normalized_duration + constexpr uint32_t cycleTotalCount = 4; // + device_id, device_type + + // Count the exact number of key/value entries needed for a MsgPack metrics + // map before writing it. auto countMetricEntries = [&](const std::map> &metrics, bool isRoot) -> uint32_t { @@ -560,46 +515,49 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, } return metricEntries; }; - auto countFlexibleMetricEntries = - [&](const std::map &flexibleMetrics) - -> uint32_t { return static_cast(flexibleMetrics.size()); }; + // Pack the four fields emitted for a concrete kernel metric. + auto packKernelMetricValues = [&](const KernelMetric *kernelMetric) { + uint64_t duration = + std::get(kernelMetric->getValue(KernelMetric::Duration)); + uint64_t invocations = + std::get(kernelMetric->getValue(KernelMetric::Invocations)); + uint64_t deviceId = + std::get(kernelMetric->getValue(KernelMetric::DeviceId)); + uint64_t deviceType = + std::get(kernelMetric->getValue(KernelMetric::DeviceType)); + metricSummary.updateDeviceIdMask(deviceType, deviceId); + const auto &deviceTypeName = + getDeviceTypeString(static_cast(deviceType)); + writer.packStr(KernelMetric::getValueName(KernelMetric::Duration)); + writer.packUInt(duration); + writer.packStr(KernelMetric::getValueName(KernelMetric::Invocations)); + writer.packUInt(invocations); + writer.packStr(KernelMetric::getValueName(KernelMetric::DeviceId)); + writer.packStr(std::to_string(deviceId)); + writer.packStr(KernelMetric::getValueName(KernelMetric::DeviceType)); + writer.packStr(deviceTypeName); + }; + // Pack all fixed-schema metrics for one frame. Root frames emit zero-valued + // inclusive placeholders for any metric type observed elsewhere. auto packMetrics = [&](const std::map> &metrics, bool isRoot) { for (const auto &[metricKind, metric] : metrics) { if (metricKind == MetricKind::Kernel) { if (isRoot) { - writer.packStr(kernelMetricDurationName); + writer.packStr(KernelMetric::getValueName(KernelMetric::Duration)); writer.packUInt(0); - writer.packStr(kernelMetricInvocationsName); + writer.packStr(KernelMetric::getValueName(KernelMetric::Invocations)); writer.packUInt(0); continue; } - auto *kernelMetric = static_cast(metric.get()); - uint64_t duration = - std::get(kernelMetric->getValue(KernelMetric::Duration)); - uint64_t invocations = std::get( - kernelMetric->getValue(KernelMetric::Invocations)); - uint64_t deviceId = - std::get(kernelMetric->getValue(KernelMetric::DeviceId)); - uint64_t deviceType = std::get( - kernelMetric->getValue(KernelMetric::DeviceType)); - const auto &deviceTypeName = - getDeviceTypeString(static_cast(deviceType)); - writer.packStr(kernelMetricDurationName); - writer.packUInt(duration); - writer.packStr(kernelMetricInvocationsName); - writer.packUInt(invocations); - writer.packStr(kernelMetricDeviceIdName); - writer.packStr(std::to_string(deviceId)); - writer.packStr(kernelMetricDeviceTypeName); - writer.packStr(deviceTypeName); + packKernelMetricValues(static_cast(metric.get())); } else if (metricKind == MetricKind::PCSampling) { auto *pcSamplingMetric = static_cast(metric.get()); for (size_t i = 0; i < PCSamplingMetric::Count; i++) { - const auto &valueName = pcSamplingMetric->getValueName(i); + const auto valueName = pcSamplingMetric->getValueName(i); writer.packStr(valueName); if (isRoot) { writer.packUInt(0); @@ -610,9 +568,10 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, } } else if (metricKind == MetricKind::Cycle) { if (isRoot) { - writer.packStr(cycleMetricDurationName); + writer.packStr(CycleMetric::getValueName(CycleMetric::Duration)); writer.packUInt(0); - writer.packStr(cycleMetricNormalizedDurationName); + writer.packStr( + CycleMetric::getValueName(CycleMetric::NormalizedDuration)); writer.packUInt(0); continue; } @@ -626,14 +585,16 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, std::get(cycleMetric->getValue(CycleMetric::DeviceId)); uint64_t deviceType = std::get(cycleMetric->getValue(CycleMetric::DeviceType)); + metricSummary.updateDeviceIdMask(deviceType, deviceId); - writer.packStr(cycleMetricDurationName); + writer.packStr(CycleMetric::getValueName(CycleMetric::Duration)); writer.packUInt(duration); - writer.packStr(cycleMetricNormalizedDurationName); + writer.packStr( + CycleMetric::getValueName(CycleMetric::NormalizedDuration)); writer.packDouble(normalizedDuration); - writer.packStr(cycleMetricDeviceIdName); + writer.packStr(CycleMetric::getValueName(CycleMetric::DeviceId)); writer.packStr(std::to_string(deviceId)); - writer.packStr(cycleMetricDeviceTypeName); + writer.packStr(CycleMetric::getValueName(CycleMetric::DeviceType)); writer.packStr(std::to_string(deviceType)); } else { throw makeLogicError("MetricKind not supported"); @@ -642,142 +603,149 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, if (isRoot) { if (metricSummary.hasKernelMetric && metrics.find(MetricKind::Kernel) == metrics.end()) { - writer.packStr(kernelMetricDurationName); + writer.packStr(KernelMetric::getValueName(KernelMetric::Duration)); writer.packUInt(0); - writer.packStr(kernelMetricInvocationsName); + writer.packStr(KernelMetric::getValueName(KernelMetric::Invocations)); writer.packUInt(0); } if (metricSummary.hasPCSamplingMetric && metrics.find(MetricKind::PCSampling) == metrics.end()) { PCSamplingMetric pcSamplingMetric; for (size_t i = 0; i < PCSamplingMetric::Count; i++) { - const auto &valueName = pcSamplingMetric.getValueName(i); + const auto valueName = pcSamplingMetric.getValueName(i); writer.packStr(valueName); writer.packUInt(0); } } if (metricSummary.hasCycleMetric && metrics.find(MetricKind::Cycle) == metrics.end()) { - writer.packStr(cycleMetricDurationName); + writer.packStr(CycleMetric::getValueName(CycleMetric::Duration)); writer.packUInt(0); - writer.packStr(cycleMetricNormalizedDurationName); + writer.packStr( + CycleMetric::getValueName(CycleMetric::NormalizedDuration)); writer.packUInt(0); } } }; + // Pack user-defined flexible metrics in MsgPack, preserving scalar and vector + // value types. auto packFlexibleMetrics = [&](const std::map &flexibleMetrics) { for (const auto &[_, flexibleMetric] : flexibleMetrics) { - const auto &valueName = flexibleMetric.getValueName(0); + const auto valueName = flexibleMetric.getValueName(0); writer.packStr(valueName); - packFlexibleMetricValue(flexibleMetric.getValues()[0]); + std::visit( + [&](auto &&v) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + writer.packUInt(v); + } else if constexpr (std::is_same_v) { + writer.packInt(v); + } else if constexpr (std::is_same_v) { + writer.packDouble(v); + } else if constexpr (std::is_same_v) { + writer.packStr(v); + } else if constexpr (std::is_same_v>) { + writer.packArray(static_cast(v.size())); + for (auto value : v) { + writer.packUInt(value); + } + } else if constexpr (std::is_same_v>) { + writer.packArray(static_cast(v.size())); + for (auto value : v) { + writer.packInt(value); + } + } else if constexpr (std::is_same_v>) { + writer.packArray(static_cast(v.size())); + for (auto value : v) { + writer.packDouble(value); + } + } else { + static_assert(sizeof(T) == 0, "Unsupported MetricValueType"); + } + }, + flexibleMetric.getValues()[0]); } }; - auto countPromotedFlexibleMetricEntries = - [&](const auto &children, - const DataEntry::LinkedFlexibleMetricMap &linkedFlexibleMetrics) - -> uint32_t { - uint32_t metricEntries = 0; - for (const auto &child : children) { - auto it = linkedFlexibleMetrics.find(child.id); - if (it != linkedFlexibleMetrics.end()) { - metricEntries += countFlexibleMetricEntries(it->second); + auto packNode = [&](auto &&packNode, + TreeData::Tree::TreeNode &treeNode) -> void { + // Write the header + packHatchetFrameHeader(treeNode.name); + const bool isRoot = treeNode.id == TreeData::Tree::TreeNode::RootId; + // Write the concrete nodes' own metrics and flexible metrics + writer.packMap( + countMetricEntries(treeNode.metricSet.metrics, isRoot) + + static_cast(treeNode.metricSet.flexibleMetrics.size())); + packMetrics(treeNode.metricSet.metrics, isRoot); + packFlexibleMetrics(treeNode.metricSet.flexibleMetrics); + + auto packLinkedVirtualNode = [&](auto &&packLinkedVirtualNode, + size_t virtualNodeId) -> void { + const auto &virtualNode = virtualTree->getNode(virtualNodeId); + auto &linkedMetrics = treeNode.metricSet.linkedMetrics; + auto &linkedFlexibleMetrics = treeNode.metricSet.linkedFlexibleMetrics; + // Write the header + packHatchetFrameHeader(virtualNode.name); + // Count linked metrics + auto metricEntries = 0u; + const auto metricsIt = linkedMetrics.find(virtualNodeId); + if (metricsIt != linkedMetrics.end()) { + metricEntries += + countMetricEntries(metricsIt->second, /*isRoot=*/false); } - } - return metricEntries; - }; - auto packPromotedFlexibleMetrics = - [&](const auto &children, - const DataEntry::LinkedFlexibleMetricMap &linkedFlexibleMetrics) { - for (const auto &child : children) { + // Count linked flexible metrics exist in the child helpers + if (!linkedFlexibleMetrics.empty()) { + for (const auto &child : virtualNode.children) { + auto it = linkedFlexibleMetrics.find(child.id); + if (it != linkedFlexibleMetrics.end()) { + metricEntries += static_cast(it->second.size()); + } + } + } + // Pack + writer.packMap(metricEntries); + if (metricsIt != treeNode.metricSet.linkedMetrics.end()) { + packMetrics(metricsIt->second, /*isRoot=*/false); + } + if (!linkedFlexibleMetrics.empty()) { + for (const auto &child : virtualNode.children) { auto it = linkedFlexibleMetrics.find(child.id); if (it != linkedFlexibleMetrics.end()) { packFlexibleMetrics(it->second); } } - }; - std::function packNode = - [&](TreeData::Tree::TreeNode &treeNode) { - writer.packMap(3); - - writer.packStr("frame"); - writer.packMap(2); - writer.packStr("name"); - writer.packStr(treeNode.name); - writer.packStr("type"); - writer.packStr("function"); - - writer.packStr("metrics"); - const bool isRoot = treeNode.id == TreeData::Tree::TreeNode::RootId; - writer.packMap( - countMetricEntries(treeNode.metricSet.metrics, isRoot) + - countFlexibleMetricEntries(treeNode.metricSet.flexibleMetrics) + - countPromotedFlexibleMetricEntries( - virtualRootNode.children, - treeNode.metricSet.linkedFlexibleMetrics)); - packMetrics(treeNode.metricSet.metrics, isRoot); - packFlexibleMetrics(treeNode.metricSet.flexibleMetrics); - packPromotedFlexibleMetrics(virtualRootNode.children, - treeNode.metricSet.linkedFlexibleMetrics); - const bool hasLinkedTargets = - !treeNode.metricSet.linkedMetrics.empty() || - !treeNode.metricSet.linkedFlexibleMetrics.empty(); - - std::function packLinkedVirtualNode = - [&](size_t virtualNodeId) { - const auto &virtualNode = virtualTree->getNode(virtualNodeId); - writer.packMap(3); - - writer.packStr("frame"); - writer.packMap(2); - writer.packStr("name"); - writer.packStr(virtualNode.name); - writer.packStr("type"); - writer.packStr("function"); + } + writer.packFixStrLiteral("children"); + writer.packArray(static_cast(virtualNode.children.size())); + for (const auto &child : virtualNode.children) { + packLinkedVirtualNode(packLinkedVirtualNode, child.id); + } + }; - writer.packStr("metrics"); - const auto metricsIt = - treeNode.metricSet.linkedMetrics.find(virtualNodeId); - const auto &linkedMetrics = - (metricsIt != treeNode.metricSet.linkedMetrics.end()) - ? metricsIt->second - : emptyMetrics; - writer.packMap( - countMetricEntries(linkedMetrics, /*isRoot=*/false) + - countPromotedFlexibleMetricEntries( - virtualNode.children, - treeNode.metricSet.linkedFlexibleMetrics)); - packMetrics(linkedMetrics, /*isRoot=*/false); - // Linked flexible metrics are only attached to - // children, so they are always packed into the parent frame. - packPromotedFlexibleMetrics( - virtualNode.children, - treeNode.metricSet.linkedFlexibleMetrics); - - writer.packStr("children"); - writer.packArray( - static_cast(virtualNode.children.size())); - for (const auto &child : virtualNode.children) { - packLinkedVirtualNode(child.id); - } - }; + const bool hasLinkedTargets = + !treeNode.metricSet.linkedMetrics.empty() || + !treeNode.metricSet.linkedFlexibleMetrics.empty(); + uint32_t linkedChildCount = + hasLinkedTargets + ? static_cast(virtualRootNode.children.size()) + : 0; + writer.packFixStrLiteral("children"); + writer.packArray(static_cast(treeNode.children.size()) + + linkedChildCount); + for (const auto &child : treeNode.children) { + packNode(packNode, tree->getNode(child.id)); + } + if (hasLinkedTargets) { + for (const auto &virtualChild : virtualRootNode.children) { + packLinkedVirtualNode(packLinkedVirtualNode, virtualChild.id); + } + } + }; - uint32_t linkedChildCount = - hasLinkedTargets - ? static_cast(virtualRootNode.children.size()) - : 0; - writer.packStr("children"); - writer.packArray(static_cast(treeNode.children.size()) + - linkedChildCount); - for (const auto &child : treeNode.children) { - packNode(tree->getNode(child.id)); - } - if (hasLinkedTargets) { - for (const auto &virtualChild : virtualRootNode.children) { - packLinkedVirtualNode(virtualChild.id); - } - } - }; + // Hatchet format: [tree, device_metadata]. Always emit 2 elements to match + // the JSON serializer, even if device_metadata is empty. + writer.packArray(2); + packNode(packNode, tree->getNode(TreeData::Tree::TreeNode::RootId)); uint32_t deviceTypeEntries = 0; for (size_t deviceType = 0; @@ -786,19 +754,6 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, ++deviceTypeEntries; } } - // Hatchet format: [tree, device_metadata]. Always emit 2 elements to match - // the JSON serializer, even if device_metadata is empty. - writer.packArray(2); - packNode(tree->getNode(TreeData::Tree::TreeNode::RootId)); - - auto countSetBits = [](uint32_t mask) -> uint32_t { - uint32_t count = 0; - while (mask) { - mask &= (mask - 1); - ++count; - } - return count; - }; writer.packMap(deviceTypeEntries); for (size_t deviceType = 0; @@ -812,7 +767,11 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, getDeviceTypeString(static_cast(deviceType)); writer.packStr(deviceTypeName); - writer.packMap(countSetBits(mask)); + uint32_t deviceIdEntries = 0; + for (auto remaining = mask; remaining != 0; remaining &= (remaining - 1)) { + ++deviceIdEntries; + } + writer.packMap(deviceIdEntries); for (uint64_t deviceId = 0; deviceId < kMaxRegisteredDeviceIds; ++deviceId) { if ((mask & (1u << static_cast(deviceId))) == 0) { @@ -821,15 +780,15 @@ TreeData::buildHatchetMsgPack(TreeData::Tree *tree, Device device = getDevice(static_cast(deviceType), deviceId); writer.packStr(std::to_string(deviceId)); writer.packMap(5); - writer.packStr("clock_rate"); + writer.packFixStrLiteral("clock_rate"); writer.packUInt(device.clockRate); - writer.packStr("memory_clock_rate"); + writer.packFixStrLiteral("memory_clock_rate"); writer.packUInt(device.memoryClockRate); - writer.packStr("bus_width"); + writer.packFixStrLiteral("bus_width"); writer.packUInt(device.busWidth); - writer.packStr("arch"); + writer.packFixStrLiteral("arch"); writer.packStr(device.arch); - writer.packStr("num_sms"); + writer.packFixStrLiteral("num_sms"); writer.packUInt(device.numSms); } } diff --git a/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp b/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp index 488b78773bc4..3924d8e3de86 100644 --- a/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp +++ b/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp @@ -103,7 +103,7 @@ size_t matchStallReasonsToIndices( std::string::npos; std::string cuptiStallName = std::string(stallReasonNames[i]); for (size_t j = 0; j < PCSamplingMetric::PCSamplingMetricKind::Count; j++) { - auto metricName = PCSamplingMetric().getValueName(j); + auto metricName = std::string(PCSamplingMetric().getValueName(j)); if (cuptiStallName.find(metricName) != std::string::npos) { if (notIssued) notIssuedStallReasonIndices.insert(stallReasonIndices[i]); diff --git a/third_party/proton/csrc/lib/Utility/MsgPackWriter.cpp b/third_party/proton/csrc/lib/Utility/MsgPackWriter.cpp index 65980c9746b7..8d89dbe9c3f3 100644 --- a/third_party/proton/csrc/lib/Utility/MsgPackWriter.cpp +++ b/third_party/proton/csrc/lib/Utility/MsgPackWriter.cpp @@ -88,7 +88,9 @@ void MsgPackWriter::packStr(std::string_view value) { out.push_back(0xdb); writeBE(out, static_cast(size)); } - out.insert(out.end(), value.begin(), value.end()); + const auto offset = out.size(); + out.resize(offset + size); + std::memcpy(out.data() + offset, value.data(), size); } void MsgPackWriter::packArray(uint32_t size) {