diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/BinaryClassificationExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/BinaryClassificationExperiment.cs similarity index 100% rename from docs/samples/Microsoft.ML.AutoML.Samples/BinaryClassificationExperiment.cs rename to docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/BinaryClassificationExperiment.cs diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/MulticlassClassificationExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/MulticlassClassificationExperiment.cs similarity index 100% rename from docs/samples/Microsoft.ML.AutoML.Samples/MulticlassClassificationExperiment.cs rename to docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/MulticlassClassificationExperiment.cs diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/RankingExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/RankingExperiment.cs similarity index 100% rename from docs/samples/Microsoft.ML.AutoML.Samples/RankingExperiment.cs rename to docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/RankingExperiment.cs diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/RecommendationExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/RecommendationExperiment.cs similarity index 100% rename from docs/samples/Microsoft.ML.AutoML.Samples/RecommendationExperiment.cs rename to docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/RecommendationExperiment.cs diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/RegressionExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/RegressionExperiment.cs similarity index 100% rename from docs/samples/Microsoft.ML.AutoML.Samples/RegressionExperiment.cs rename to docs/samples/Microsoft.ML.AutoML.Samples/AutoFit/RegressionExperiment.cs diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/ParameterExample.cs b/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/ParameterExample.cs new file mode 100644 index 0000000000..bac5a60e18 --- /dev/null +++ b/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/ParameterExample.cs @@ -0,0 +1,27 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.ML.SearchSpace; + +namespace Microsoft.ML.AutoML.Samples +{ + public static class ParameterExample + { + public static void Run() + { + // Parameter is essentially a wrapper class over Json. + // Therefore it supports all json types, like integar, number, boolearn, string, etc.. + + // To create parameter over existing value, use Parameter.From + var intParam = Parameter.FromInt(10); + var doubleParam = Parameter.FromDouble(20); + var boolParam = Parameter.FromBool(false); + + // To cast parameter to specific type, use Parameter.AsType + // NOTE: Casting to a wrong type will trigger an argumentException. + var i = intParam.AsType(); // i == 10 + var d = doubleParam.AsType(); // d == 20 + var b = boolParam.AsType(); // b == false + } + } +} diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SearchSpaceExample.cs b/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SearchSpaceExample.cs new file mode 100644 index 0000000000..0ca0e27dac --- /dev/null +++ b/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SearchSpaceExample.cs @@ -0,0 +1,67 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Diagnostics; +using System.Text; +using System.Text.Json; +using Microsoft.ML.SearchSpace; +using Microsoft.ML.SearchSpace.Option; + +namespace Microsoft.ML.AutoML.Samples +{ + public static class SearchSpaceExample + { + public static void Run() + { + // The following code shows how to create a SearchSpace for MyParameter. + var myParameterSearchSpace = new SearchSpace(); + + // Equivalently, you can also create myParameterSearchSpace from scratch. + var myParameterSearchSpace2 = new SearchSpace.SearchSpace(); + + // numeric options + myParameterSearchSpace2["IntOption"] = new UniformIntOption(min: -10, max: 10, logBase: false, defaultValue: 0); + myParameterSearchSpace2["SingleOption"] = new UniformSingleOption(min: 1, max: 10, logBase: true, defaultValue: 1); + myParameterSearchSpace2["DoubleOption"] = new UniformDoubleOption(min: -10, max: 10, logBase: false, defaultValue: 0); + + // choice options + myParameterSearchSpace2["BoolOption"] = new ChoiceOption(true, false); + myParameterSearchSpace2["StrOption"] = new ChoiceOption("a", "b", "c"); + + // nest options + var nestedSearchSpace = new SearchSpace.SearchSpace(); + nestedSearchSpace["IntOption"] = new UniformIntOption(min: -10, max: 10, logBase: false, defaultValue: 0); + myParameterSearchSpace2["Nest"] = nestedSearchSpace; + + // the two search space should be equal + Debug.Assert(myParameterSearchSpace.GetHashCode() == myParameterSearchSpace2.GetHashCode()); + } + + public class MyParameter + { + [Range((int)-10, 10, 0, false)] + public int IntOption { get; set; } + + [Range(1f, 10f, 1f, true)] + public float SingleOption { get; set; } + + [Range(-10, 10, false)] + public double DoubleOption { get; set; } + + [BooleanChoice] + public bool BoolOption { get; set; } + + [Choice("a", "b", "c")] + public string StrOption { get; set; } + + [NestOption] + public NestParameter Nest { get; set; } + } + + public class NestParameter + { + [Range((int)-10, 10, 0, false)] + public int IntOption { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SweepableLightGBMBinaryExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SweepableLightGBMBinaryExperiment.cs new file mode 100644 index 0000000000..77d7fd3288 --- /dev/null +++ b/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SweepableLightGBMBinaryExperiment.cs @@ -0,0 +1,171 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Microsoft.ML.Data; +using Microsoft.ML.SearchSpace; + +namespace Microsoft.ML.AutoML.Samples +{ + public static class SweepableLightGBMBinaryExperiment + { + class LightGBMOption + { + [Range(4, 32768, init: 4, logBase: false)] + public int NumberOfLeaves { get; set; } = 4; + + [Range(4, 32768, init: 4, logBase: false)] + public int NumberOfTrees { get; set; } = 4; + } + + public static async Task RunAsync() + { + // This example shows how to use Sweepable API to run hyper-parameter optimization over + // LightGBM trainer with a customized search space. + + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. + var seed = 0; + var context = new MLContext(seed); + + // Create a list of training data points and convert it to IDataView. + var data = GenerateRandomBinaryClassificationDataPoints(100, seed); + var dataView = context.Data.LoadFromEnumerable(data); + + // Split the dataset into train and test sets with 10% of the data used for testing. + var trainTestSplit = context.Data.TrainTestSplit(dataView, testFraction: 0.1); + + // Define a customized search space for LightGBM + var lgbmSearchSpace = new SearchSpace(); + + // Define the sweepable LightGBM estimator. + var lgbm = context.Auto().CreateSweepableEstimator((_context, option) => + { + return _context.BinaryClassification.Trainers.LightGbm( + "Label", + "Features", + numberOfLeaves: option.NumberOfLeaves, + numberOfIterations: option.NumberOfTrees); + }, lgbmSearchSpace); + + // Create sweepable pipeline + var pipeline = new EstimatorChain().Append(lgbm); + + // Create an AutoML experiment + var experiment = context.Auto().CreateExperiment(); + + // Redirect AutoML log to console + context.Log += (object o, LoggingEventArgs e) => + { + if (e.Source == nameof(AutoMLExperiment) && e.Kind > Runtime.ChannelMessageKind.Trace) + { + Console.WriteLine(e.RawMessage); + } + }; + + // Config experiment to optimize "Accuracy" metric on given dataset. + // This experiment will run hyper-parameter optimization on given pipeline + experiment.SetPipeline(pipeline) + .SetDataset(trainTestSplit.TrainSet, fold: 5) // use 5-fold cross validation to evaluate each trial + .SetBinaryClassificationMetric(BinaryClassificationMetric.Accuracy, "Label") + .SetMaxModelToExplore(100); // explore 100 trials + + // start automl experiment + var result = await experiment.RunAsync(); + + // Expected output samples during training. The pipeline will be unknown because it's created using + // customized sweepable estimator, therefore AutoML doesn't have the knowledge of the exact type of the estimator. + // Update Running Trial - Id: 0 + // Update Completed Trial - Id: 0 - Metric: 0.5105967259285338 - Pipeline: Unknown=>Unknown - Duration: 616 - Peak CPU: 0.00% - Peak Memory in MB: 35.54 + // Update Best Trial - Id: 0 - Metric: 0.5105967259285338 - Pipeline: Unknown=>Unknown + + // evaluate test dataset on best model. + var bestModel = result.Model; + var eval = bestModel.Transform(trainTestSplit.TestSet); + var metrics = context.BinaryClassification.Evaluate(eval); + + PrintMetrics(metrics); + + // Expected output: + // Accuracy: 0.67 + // AUC: 0.75 + // F1 Score: 0.33 + // Negative Precision: 0.88 + // Negative Recall: 0.70 + // Positive Precision: 0.25 + // Positive Recall: 0.50 + + // TEST POSITIVE RATIO: 0.1667(2.0 / (2.0 + 10.0)) + // Confusion table + // ||====================== + // PREDICTED || positive | negative | Recall + // TRUTH ||====================== + // positive || 1 | 1 | 0.5000 + // negative || 3 | 7 | 0.7000 + // ||====================== + // Precision || 0.2500 | 0.8750 | + } + + private static IEnumerable GenerateRandomBinaryClassificationDataPoints(int count, + int seed = 0) + + { + var random = new Random(seed); + float randomFloat() => (float)random.NextDouble(); + for (int i = 0; i < count; i++) + { + var label = randomFloat() > 0.5f; + yield return new BinaryClassificationDataPoint + { + Label = label, + // Create random features that are correlated with the label. + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + + }; + } + } + + // Example with label and 50 feature values. A data set is a collection of + // such examples. + private class BinaryClassificationDataPoint + { + public bool Label { get; set; } + + [VectorType(50)] + public float[] Features { get; set; } + } + + // Class used to capture predictions. + private class Prediction + { + // Original label. + public bool Label { get; set; } + // Predicted label from the trainer. + public bool PredictedLabel { get; set; } + } + + // Pretty-print BinaryClassificationMetrics objects. + private static void PrintMetrics(BinaryClassificationMetrics metrics) + { + Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); + Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); + Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); + } + } +} diff --git a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs index a965fcf832..f60d4af558 100644 --- a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs +++ b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs @@ -291,6 +291,13 @@ public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, b /// /// Create a sweepable estimator with a custom factory and search space. /// + /// + /// + /// + /// + /// public SweepableEstimator CreateSweepableEstimator(Func> factory, SearchSpace ss = null) where T : class, new() { diff --git a/src/Microsoft.ML.SearchSpace/Parameter.cs b/src/Microsoft.ML.SearchSpace/Parameter.cs index 58eeff0b6d..87e85a803f 100644 --- a/src/Microsoft.ML.SearchSpace/Parameter.cs +++ b/src/Microsoft.ML.SearchSpace/Parameter.cs @@ -52,6 +52,13 @@ public enum ParameterType /// /// is used to save sweeping result from tuner and is used to restore mlnet pipeline from sweepable pipeline. /// + /// + /// + /// + /// + /// [JsonConverter(typeof(ParameterConverter))] public sealed class Parameter : IDictionary, IEquatable, IEqualityComparer { diff --git a/src/Microsoft.ML.SearchSpace/SearchSpace.cs b/src/Microsoft.ML.SearchSpace/SearchSpace.cs index 84c2da890d..ea29cc06a3 100644 --- a/src/Microsoft.ML.SearchSpace/SearchSpace.cs +++ b/src/Microsoft.ML.SearchSpace/SearchSpace.cs @@ -16,6 +16,13 @@ namespace Microsoft.ML.SearchSpace /// /// This class is used to represent a set of , which can be either one of , or another nested search space. /// + /// + /// + /// + /// + /// [JsonConverter(typeof(SearchSpaceConverter))] public class SearchSpace : OptionBase, IDictionary { @@ -373,6 +380,13 @@ private Parameter Update(Parameter left, Parameter right) } /// + /// + /// + /// + /// + /// public sealed class SearchSpace : SearchSpace where T : class, new() {