diff --git a/test/Microsoft.ML.AutoML.Tests/Utils/TaskAgnosticAutoFit.cs b/test/Microsoft.ML.AutoML.Tests/Utils/TaskAgnosticAutoFit.cs new file mode 100644 index 0000000000..afaf3e5b13 --- /dev/null +++ b/test/Microsoft.ML.AutoML.Tests/Utils/TaskAgnosticAutoFit.cs @@ -0,0 +1,144 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Data; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Microsoft.ML.AutoML.Test +{ + public enum TaskType + { + Classification = 1, + Regression + } + + /// + /// make AutoFit and Score calls uniform across task types + /// + internal class TaskAgnosticAutoFit + { + private TaskType taskType; + private MLContext context; + + internal interface IUniversalProgressHandler : IProgress>, IProgress> + { + } + + internal TaskAgnosticAutoFit(TaskType taskType, MLContext context) + { + this.taskType = taskType; + this.context = context; + } + + internal IEnumerable AutoFit( + IDataView trainData, + string label, + int maxModels, + uint maxExperimentTimeInSeconds, + IDataView validationData = null, + IEstimator preFeaturizers = null, + IEnumerable<(string, ColumnPurpose)> columnPurposes = null, + IUniversalProgressHandler progressHandler = null) + { + var columnInformation = new ColumnInformation() { LabelColumnName = label }; + + switch (this.taskType) + { + case TaskType.Classification: + + var mcs = new MulticlassExperimentSettings + { + OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy, + + MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds, + MaxModels = maxModels + }; + + var classificationResult = this.context.Auto() + .CreateMulticlassClassificationExperiment(mcs) + .Execute( + trainData, + validationData, + columnInformation, + progressHandler: progressHandler); + + var iterationResults = classificationResult.RunDetails.Select(i => new TaskAgnosticIterationResult(i)).ToList(); + + return iterationResults; + + case TaskType.Regression: + + var rs = new RegressionExperimentSettings + { + OptimizingMetric = RegressionMetric.RSquared, + + MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds, + MaxModels = maxModels + }; + + var regressionResult = this.context.Auto() + .CreateRegressionExperiment(rs) + .Execute( + trainData, + validationData, + columnInformation, + progressHandler: progressHandler); + + iterationResults = regressionResult.RunDetails.Select(i => new TaskAgnosticIterationResult(i)).ToList(); + + return iterationResults; + + default: + throw new ArgumentException($"Unknown task type {this.taskType}.", "TaskType"); + } + } + + internal struct ScoreResult + { + public IDataView ScoredTestData; + public double PrimaryMetricResult; + public Dictionary Metrics; + } + + internal ScoreResult Score( + IDataView testData, + ITransformer model, + string label) + { + var result = new ScoreResult(); + + result.ScoredTestData = model.Transform(testData); + + switch (this.taskType) + { + case TaskType.Classification: + + var classificationMetrics = context.MulticlassClassification.Evaluate(result.ScoredTestData, labelColumnName: label); + + //var classificationMetrics = context.MulticlassClassification.(scoredTestData, labelColumnName: label); + result.PrimaryMetricResult = classificationMetrics.MicroAccuracy; // TODO: don't hardcode metric + result.Metrics = TaskAgnosticIterationResult.MetricValuesToDictionary(classificationMetrics); + + break; + + case TaskType.Regression: + + var regressionMetrics = context.Regression.Evaluate(result.ScoredTestData, labelColumnName: label); + + result.PrimaryMetricResult = regressionMetrics.RSquared; // TODO: don't hardcode metric + result.Metrics = TaskAgnosticIterationResult.MetricValuesToDictionary(regressionMetrics); + + break; + + default: + throw new ArgumentException($"Unknown task type {this.taskType}.", "TaskType"); + } + + return result; + } + } +} + diff --git a/test/Microsoft.ML.AutoML.Tests/Utils/TaskAgnosticIterationResult.cs b/test/Microsoft.ML.AutoML.Tests/Utils/TaskAgnosticIterationResult.cs new file mode 100644 index 0000000000..462872dc9f --- /dev/null +++ b/test/Microsoft.ML.AutoML.Tests/Utils/TaskAgnosticIterationResult.cs @@ -0,0 +1,87 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Data; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Microsoft.ML.AutoML.Test +{ + internal class TaskAgnosticIterationResult + { + internal double PrimaryMetricValue; + + internal Dictionary MetricValues = new Dictionary(); + + internal readonly ITransformer Model; + internal readonly Exception Exception; + internal string TrainerName; + internal double RuntimeInSeconds; + internal IEstimator Estimator; + internal Pipeline Pipeline; + internal int PipelineInferenceTimeInSeconds; + + private string primaryMetricName; + + private TaskAgnosticIterationResult(RunDetail baseRunDetail, object validationMetrics, string primaryMetricName) + { + this.TrainerName = baseRunDetail.TrainerName; + this.Estimator = baseRunDetail.Estimator; + this.Pipeline = baseRunDetail.Pipeline; + + this.PipelineInferenceTimeInSeconds = (int)baseRunDetail.PipelineInferenceTimeInSeconds; + this.RuntimeInSeconds = (int)baseRunDetail.RuntimeInSeconds; + + this.primaryMetricName = primaryMetricName; + this.PrimaryMetricValue = -1; // default value in case of exception. TODO: won't work for minimizing metrics, use nullable? + + if (validationMetrics == null) + { + return; + } + + this.MetricValues = MetricValuesToDictionary(validationMetrics); + + this.PrimaryMetricValue = this.MetricValues[this.primaryMetricName]; + } + + public TaskAgnosticIterationResult(RunDetail runDetail, string primaryMetricName = "RSquared") + : this(runDetail, runDetail.ValidationMetrics, primaryMetricName) + { + if (runDetail.Exception == null) + { + this.Model = runDetail.Model; + } + + this.Exception = runDetail.Exception; + } + + public TaskAgnosticIterationResult(RunDetail runDetail, string primaryMetricName = "MicroAccuracy") + : this(runDetail, runDetail.ValidationMetrics, primaryMetricName) + { + if (runDetail.Exception == null) + { + this.Model = runDetail.Model; + } + + this.Exception = runDetail.Exception; + } + + public static Dictionary MetricValuesToDictionary(T metric) + { + var supportedTypes = new[] { typeof(MulticlassClassificationMetrics), typeof(RegressionMetrics) }; + + if (!supportedTypes.Contains(metric.GetType())) + { + throw new ArgumentException($"Unsupported metric type {typeof(T).Name}."); + } + + var propertiesToReport = metric.GetType().GetProperties().Where(p => p.PropertyType == typeof(double)); + + return propertiesToReport.ToDictionary(p => p.Name, p => (double)metric.GetType().GetProperty(p.Name).GetValue(metric)); + } + } +} +