1919
2020import com .codahale .metrics .*;
2121import com .google .common .annotations .VisibleForTesting ;
22+ import com .google .common .collect .ImmutableMap ;
2223import org .apache .hadoop .metrics2 .MetricsCollector ;
2324import org .apache .hadoop .metrics2 .MetricsInfo ;
2425import org .apache .hadoop .metrics2 .MetricsRecordBuilder ;
2526import org .apache .hadoop .metrics2 .MetricsSource ;
2627
2728import java .util .Map ;
29+ import java .util .concurrent .TimeUnit ;
2830
2931/**
3032 * Modeled off of YARN's NodeManagerMetrics.
3133 */
3234public class YarnShuffleServiceMetrics implements MetricsSource {
3335
36+ // Converting from the dropwizard-metrics default of nanoseconds into milliseconds to match how
37+ // MetricsServlet serializes times (to milliseconds) configured via the MetricsModule passed into
38+ // its Jackson ObjectMapper. Without this rate factor applied, the Timer metrics from
39+ // ExternalShuffleBlockManager#ShuffleMetrics with "Millis" suffixes are misleading, as they
40+ // would otherwise contain values in nanoseconds units
41+ private static final double rateFactor = (double ) TimeUnit .MILLISECONDS .toNanos (1L );
42+
3443 private final MetricSet metricSet ;
3544
3645 public YarnShuffleServiceMetrics (MetricSet metricSet ) {
@@ -52,13 +61,50 @@ public void getMetrics(MetricsCollector collector, boolean all) {
5261 }
5362 }
5463
64+ private static void addSnapshotToMetricRecordBuilder (Snapshot snapshot ,
65+ MetricsRecordBuilder builder ,
66+ String name ,
67+ String metricType ) {
68+
69+ ImmutableMap <String , Double > doubleValues = ImmutableMap .<String , Double >builder ()
70+ .put ("median" , snapshot .getMedian ())
71+ .put ("mean" , snapshot .getMean ())
72+ .put ("75th" , snapshot .get75thPercentile ())
73+ .put ("95th" , snapshot .get95thPercentile ())
74+ .put ("98th" , snapshot .get98thPercentile ())
75+ .put ("99th" , snapshot .get99thPercentile ())
76+ .put ("999th" , snapshot .get999thPercentile ())
77+ .build ();
78+
79+ ImmutableMap <String , Long > longValues = ImmutableMap .<String , Long >builder ()
80+ .put ("min" , snapshot .getMin ())
81+ .put ("max" , snapshot .getMax ())
82+ .build ();
83+
84+ for (Map .Entry <String , Double > entry : doubleValues .entrySet ()) {
85+ builder .addGauge (
86+ new ShuffleServiceMetricsInfo (name + "_" + entry .getKey (),
87+ entry .getKey () + " of " + metricType + " " + name ),
88+ entry .getValue () / rateFactor );
89+ }
90+
91+ for (Map .Entry <String , Long > entry : longValues .entrySet ()) {
92+ builder .addGauge (
93+ new ShuffleServiceMetricsInfo (name + "_" + entry .getKey (),
94+ entry .getKey () + " of " + metricType + " " + name ),
95+ entry .getValue () / rateFactor );
96+ }
97+
98+ }
99+
55100 @ VisibleForTesting
56101 public static void collectMetric (
57102 MetricsRecordBuilder metricsRecordBuilder , String name , Metric metric ) {
58103
59104 // The metric types used in ExternalShuffleBlockHandler.ShuffleMetrics
60105 if (metric instanceof Timer ) {
61106 Timer t = (Timer ) metric ;
107+ Snapshot snapshot = t .getSnapshot ();
62108 metricsRecordBuilder
63109 .addCounter (new ShuffleServiceMetricsInfo (name + "_count" , "Count of timer " + name ),
64110 t .getCount ())
@@ -73,6 +119,7 @@ public static void collectMetric(
73119 t .getOneMinuteRate ())
74120 .addGauge (new ShuffleServiceMetricsInfo (name + "_rateMean" , "Mean rate of timer " + name ),
75121 t .getMeanRate ());
122+ addSnapshotToMetricRecordBuilder (snapshot , metricsRecordBuilder , name , "timer" );
76123 } else if (metric instanceof Meter ) {
77124 Meter m = (Meter ) metric ;
78125 metricsRecordBuilder
0 commit comments