dotnet · najeeb-kazmi · Apr 24, 2020 · Apr 16, 2020 · Apr 16, 2020 · Apr 17, 2020
diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs
@@ -66,11 +66,11 @@ public CrossValRunner(MLContext context,
 
         private static double CalcAverageScore(IEnumerable<double> scores)
         {
-            if (scores.Any(s => double.IsNaN(s)))
-            {
+            var newScores = scores.Where(r => !double.IsNaN(r));
+            // Return NaN iff all scores are NaN
+            if (newScores.Count() == 0)
                 return double.NaN;
-            }
-            return scores.Average();
+            return newScores.Average();
         }
     }
 }
diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
@@ -6,6 +6,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
 
 namespace Microsoft.ML.AutoML
@@ -70,27 +71,105 @@ public CrossValSummaryRunner(MLContext context,
 
             // Get the model from the best fold
             var bestFoldIndex = BestResultUtil.GetIndexOfBestScore(trainResults.Select(r => r.score), _optimizingMetricInfo.IsMaximizing);
+            // bestFoldIndex will be -1 if the optimization metric for all folds is NaN.
+            // In this case, return model from the first fold.
+            bestFoldIndex = bestFoldIndex != -1 ? bestFoldIndex : 0;
 private static int GetIndexClosestToAverage(IEnumerable<double> values, double average) 
 { 
     int avgFoldIndex = -1; 
     var smallestDistFromAvg = double.PositiveInfinity; 
     for (var i = 0; i < values.Count(); i++) 
     { 
         var distFromAvg = Math.Abs(values.ElementAt(i) - average); 
         if (distFromAvg < smallestDistFromAvg || smallestDistFromAvg == double.PositiveInfinity) 
         { 
             smallestDistFromAvg = distFromAvg; 
             avgFoldIndex = i; 
         } 
     } 
     return avgFoldIndex; 
 } 
 /// <summary> 
 /// The <see cref="RegressionMetricsStatistics"/> class holds summary 
 /// statistics over multiple observations of <see cref="RegressionMetrics"/>. 
 /// </summary> 
 public sealed class RegressionMetricsStatistics : IMetricsStatistics<RegressionMetrics> 
 private static int GetIndexClosestToAverage(IEnumerable<double> values, double average) 
 { 
     int avgFoldIndex = -1; 
     var smallestDistFromAvg = double.PositiveInfinity; 
     for (var i = 0; i < values.Count(); i++) 
     { 
         var distFromAvg = Math.Abs(values.ElementAt(i) - average); 
         if (distFromAvg < smallestDistFromAvg || smallestDistFromAvg == double.PositiveInfinity) 
         { 
             smallestDistFromAvg = distFromAvg; 
             avgFoldIndex = i; 
         } 
     } 
     return avgFoldIndex; 
 } 
 /// <summary> 
 /// The <see cref="RegressionMetricsStatistics"/> class holds summary 
 /// statistics over multiple observations of <see cref="RegressionMetrics"/>. 
 /// </summary> 
 public sealed class RegressionMetricsStatistics : IMetricsStatistics<RegressionMetrics> 
             var bestModel = trainResults.ElementAt(bestFoldIndex).model;
 
-            // Get the metrics from the fold whose score is closest to avg of all fold scores
-            var avgScore = trainResults.Average(r => r.score);
+            // Get the average metrics across all folds
+            var avgScore = GetAverageOfNonNaNScores(trainResults.Select(x => x.score));
             var indexClosestToAvg = GetIndexClosestToAverage(trainResults.Select(r => r.score), avgScore);
             var metricsClosestToAvg = trainResults[indexClosestToAvg].metrics;
+            var avgMetrics = GetAverageMetrics(trainResults.Select(x => x.metrics), metricsClosestToAvg);
 
             // Build result objects
-            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail<TMetrics>(pipeline, avgScore, allRunsSucceeded, metricsClosestToAvg, bestModel, null);
+            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail<TMetrics>(pipeline, avgScore, allRunsSucceeded, avgMetrics, bestModel, null);
             var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);
             return (suggestedPipelineRunDetail, runDetail);
         }
 
+        private static TMetrics GetAverageMetrics(IEnumerable<TMetrics> metrics, TMetrics metricsClosestToAvg)
+        {
+            if (typeof(TMetrics) == typeof(BinaryClassificationMetrics))
+            {
+                var newMetrics = metrics.Select(x => x as BinaryClassificationMetrics);
+                Contracts.Assert(newMetrics != null);
+
+                var result = new BinaryClassificationMetrics(
+                    auc: GetAverageOfNonNaNScores(newMetrics.Select(x => x.AreaUnderRocCurve)),
+                    accuracy: GetAverageOfNonNaNScores(newMetrics.Select(x => x.Accuracy)),
+                    positivePrecision: GetAverageOfNonNaNScores(newMetrics.Select(x => x.PositivePrecision)),
+                    positiveRecall: GetAverageOfNonNaNScores(newMetrics.Select(x => x.PositiveRecall)),
+                    negativePrecision: GetAverageOfNonNaNScores(newMetrics.Select(x => x.NegativePrecision)),
+                    negativeRecall: GetAverageOfNonNaNScores(newMetrics.Select(x => x.NegativeRecall)),
+                    f1Score: GetAverageOfNonNaNScores(newMetrics.Select(x => x.F1Score)),
+                    auprc: GetAverageOfNonNaNScores(newMetrics.Select(x => x.AreaUnderPrecisionRecallCurve)),
+                    // Return ConfusionMatrix from the fold closest to average score
+                    confusionMatrix: (metricsClosestToAvg as BinaryClassificationMetrics).ConfusionMatrix);
+                return result as TMetrics;
+            }
+
+            if (typeof(TMetrics) == typeof(MulticlassClassificationMetrics))
+            {
+                var newMetrics = metrics.Select(x => x as MulticlassClassificationMetrics);
+                Contracts.Assert(newMetrics != null);
+
+                var result = new MulticlassClassificationMetrics(
+                    accuracyMicro: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MicroAccuracy)),
+                    accuracyMacro: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MacroAccuracy)),
+                    logLoss: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLoss)),
+                    logLossReduction: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLossReduction)),
+                    topKPredictionCount: newMetrics.ElementAt(0).TopKPredictionCount,
+                    topKAccuracy: GetAverageOfNonNaNScores(newMetrics.Select(x => x.TopKAccuracy)),
+                    // Return PerClassLogLoss and ConfusionMatrix from the fold closest to average score
+                    perClassLogLoss: (metricsClosestToAvg as MulticlassClassificationMetrics).PerClassLogLoss.ToArray(),
+                    confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix);
+                return result as TMetrics;
+            }
+
+            if (typeof(TMetrics) == typeof(RegressionMetrics))
+            {
+                var newMetrics = metrics.Select(x => x as RegressionMetrics);
+                Contracts.Assert(newMetrics != null);
+
+                var result = new RegressionMetrics(
+                    l1: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MeanAbsoluteError)),
+                    l2: GetAverageOfNonNaNScores(newMetrics.Select(x => x.MeanSquaredError)),
+                    rms: GetAverageOfNonNaNScores(newMetrics.Select(x => x.RootMeanSquaredError)),
+                    lossFunction: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LossFunction)),
+                    rSquared: GetAverageOfNonNaNScores(newMetrics.Select(x => x.RSquared)));
+                return result as TMetrics;
+            }
+
+            throw new NotImplementedException($"Metric {typeof(TMetrics)} not implemented");
+        }
+
+        private static double GetAverageOfNonNaNScores(IEnumerable<double> results)
+        {
+            var newResults = results.Where(r => !double.IsNaN(r));
 return Double.IsNaN(_logLoss) ? Double.NaN : (_numLogLossPositives + _numLogLossNegatives > 0) 
     ? _logLoss / (_numLogLossPositives + _numLogLossNegatives) : 0; 
 _logLoss += logloss * weight; 
 Double logloss; 
 if (!Single.IsNaN(prob)) 
 { 
     if (_label > 0) 
     { 
         // REVIEW: Should we bring back the option to use ln instead of log2? 
         logloss = -Math.Log(prob, 2); 
     } 
     else 
         logloss = -Math.Log(1.0 - prob, 2); 
 } 
 else 
     logloss = Double.NaN; 
 UnweightedCounters.Update(_score, prob, _label, logloss, 1); 
 public double LogLoss { get { return _numInstances > 0 ? _totalLogLoss / _numInstances : 0; } } 
 _totalLogLoss += loglossCurr * weight; 
 double logloss; 
 if (intLabel < _scoresArr.Length) 
 { 
     // REVIEW: This assumes that the predictions are probabilities, not just relative scores 
     // for the classes. Is this a correct assumption? 
     float p = Math.Min(1, Math.Max(Epsilon, _scoresArr[intLabel])); 
     logloss = -Math.Log(p); 
 } 
 else 
 { 
     // Penalize logloss if the label was not seen during training 
     logloss = -Math.Log(Epsilon); 
     _numUnknownClassInstances++; 
 } 
 Double logloss; 
 if (!Single.IsNaN(prob)) 
 { 
     if (_label > 0) 
     { 
         // REVIEW: Should we bring back the option to use ln instead of log2? 
         logloss = -Math.Log(prob, 2); 
     } 
     else 
         logloss = -Math.Log(1.0 - prob, 2); 
 } 
 else 
     logloss = Double.NaN; 
 UnweightedCounters.Update(_score, prob, _label, logloss, 1); 
 return Double.IsNaN(_logLoss) ? Double.NaN : (_numLogLossPositives + _numLogLossNegatives > 0) 
     ? _logLoss / (_numLogLossPositives + _numLogLossNegatives) : 0; 
 _logLoss += logloss * weight; 
 Double logloss; 
 if (!Single.IsNaN(prob)) 
 { 
     if (_label > 0) 
     { 
         // REVIEW: Should we bring back the option to use ln instead of log2? 
         logloss = -Math.Log(prob, 2); 
     } 
     else 
         logloss = -Math.Log(1.0 - prob, 2); 
 } 
 else 
     logloss = Double.NaN; 
  
 UnweightedCounters.Update(_score, prob, _label, logloss, 1); 
 public double LogLoss { get { return _numInstances > 0 ? _totalLogLoss / _numInstances : 0; } } 
 _totalLogLoss += loglossCurr * weight; 
 double logloss; 
 if (intLabel < _scoresArr.Length) 
 { 
     // REVIEW: This assumes that the predictions are probabilities, not just relative scores 
     // for the classes. Is this a correct assumption? 
     float p = Math.Min(1, Math.Max(Epsilon, _scoresArr[intLabel])); 
     logloss = -Math.Log(p); 
 } 
 else 
 { 
     // Penalize logloss if the label was not seen during training 
     logloss = -Math.Log(Epsilon); 
     _numUnknownClassInstances++; 
 } 
 Double logloss; 
 if (!Single.IsNaN(prob)) 
 { 
     if (_label > 0) 
     { 
         // REVIEW: Should we bring back the option to use ln instead of log2? 
         logloss = -Math.Log(prob, 2); 
     } 
     else 
         logloss = -Math.Log(1.0 - prob, 2); 
 } 
 else 
     logloss = Double.NaN; 
  
 UnweightedCounters.Update(_score, prob, _label, logloss, 1); 
+            // Return NaN iff all scores are NaN
+            if (newResults.Count() == 0)
+                return double.NaN;
+            // Return average of non-NaN scores otherwise
+            return newResults.Average(r => r);
+        }
+
         private static int GetIndexClosestToAverage(IEnumerable<double> values, double average)
         {
+            // Average will be NaN iff all values are NaN.
+            // Return the first index in this case.
+            if (double.IsNaN(average))
+                return 0;
+
             int avgFoldIndex = -1;
             var smallestDistFromAvg = double.PositiveInfinity;
             for (var i = 0; i < values.Count(); i++)
             {
-                var distFromAvg = Math.Abs(values.ElementAt(i) - average);
-                if (distFromAvg < smallestDistFromAvg || smallestDistFromAvg == double.PositiveInfinity)
+                var value = values.ElementAt(i);
+                if (double.IsNaN(value))
+                    continue;
+                var distFromAvg = Math.Abs(value - average);
+                if (distFromAvg < smallestDistFromAvg)
                 {
                     smallestDistFromAvg = distFromAvg;
                     avgFoldIndex = i;

diff --git a/src/Microsoft.ML.AutoML/Utils/BestResultUtil.cs b/src/Microsoft.ML.AutoML/Utils/BestResultUtil.cs
@@ -41,6 +41,9 @@ public static RunDetail<TMetrics> GetBestRun<TMetrics>(IEnumerable<RunDetail<TMe
             if (!results.Any()) { return null; }
             var scores = results.Select(r => metricsAgent.GetScore(r.ValidationMetrics));
             var indexOfBestScore = GetIndexOfBestScore(scores, isMetricMaximizing);
+            // indexOfBestScore will be -1 if the optimization metric for all models is NaN.
+            // In this case, return the first model.
+            indexOfBestScore = indexOfBestScore != -1 ? indexOfBestScore : 0;
             return results.ElementAt(indexOfBestScore);
         }
 
@@ -51,6 +54,9 @@ public static CrossValidationRunDetail<TMetrics> GetBestRun<TMetrics>(IEnumerabl
             if (!results.Any()) { return null; }
             var scores = results.Select(r => r.Results.Average(x => metricsAgent.GetScore(x.ValidationMetrics)));
             var indexOfBestScore = GetIndexOfBestScore(scores, isMetricMaximizing);
+            // indexOfBestScore will be -1 if the optimization metric for all models is NaN.
+            // In this case, return the first model.
+            indexOfBestScore = indexOfBestScore != -1 ? indexOfBestScore : 0;
             return results.ElementAt(indexOfBestScore);
         }
 

diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs
@@ -122,5 +122,12 @@ internal BinaryClassificationMetrics(double auc, double accuracy, double positiv
             F1Score = f1Score;
             AreaUnderPrecisionRecallCurve = auprc;
         }
+
+        internal BinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall,
+            double negativePrecision, double negativeRecall, double f1Score, double auprc, ConfusionMatrix confusionMatrix)
+            : this(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc)
+        {
+            ConfusionMatrix = confusionMatrix;
+        }
     }
 }
diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -134,5 +134,12 @@ internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMa
             TopKAccuracy = topKAccuracy;
             PerClassLogLoss = perClassLogLoss.ToImmutableArray();
         }
+
+        internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,
+            int topKPredictionCount, double topKAccuracy, double[] perClassLogLoss, ConfusionMatrix confusionMatrix)
+            : this(accuracyMicro, accuracyMacro, logLoss, logLossReduction, topKPredictionCount, topKAccuracy, perClassLogLoss)
+        {
+            ConfusionMatrix = confusionMatrix;
+        }
     }
 }