diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs index bcb132a815..1cb2c7c466 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs @@ -8,37 +8,46 @@ namespace Samples.Dynamic.Trainers.Regression { public static class FastForestRegression { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.FastForest(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers.FastForest( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -60,7 +69,8 @@ public static void Example() // RSquared: 0.96 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -70,12 +80,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -95,10 +107,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.tt index fbc1d6253d..92b968f921 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.tt @@ -1,12 +1,16 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="FastForestRegression"; string ExtraUsing = null; -string Trainer = @"FastForest(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @"FastForest( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; + string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs index 780545f68a..5482db46be 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Regression { public static class FastForestWithOptionsRegression { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -38,21 +41,26 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.FastForest(options); + var pipeline = + mlContext.Regression.Trainers.FastForest(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -74,7 +82,8 @@ public static void Example() // RSquared: 0.95 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -84,12 +93,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -109,10 +120,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.tt index 00b3740ddb..b32a5920b0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.tt @@ -1,8 +1,9 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="FastForestWithOptionsRegression"; string ExtraUsing = "using Microsoft.ML.Trainers.FastTree;"; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs index 7a46027bc6..24f4626727 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs @@ -8,37 +8,46 @@ namespace Samples.Dynamic.Trainers.Regression { public static class FastTreeRegression { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.FastTree(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers.FastForest( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -60,7 +69,8 @@ public static void Example() // RSquared: 0.99 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -70,12 +80,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -95,10 +107,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.tt index 5c06288114..b23398c9fd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.tt @@ -1,12 +1,15 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="FastTreeRegression"; string ExtraUsing = null; -string Trainer = @"FastTree(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @"FastForest( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs index 22e6ff2e01..497919cdcb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs @@ -8,37 +8,46 @@ namespace Samples.Dynamic.Trainers.Regression { public static class FastTreeTweedieRegression { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.FastTreeTweedie(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers.FastForest( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -60,7 +69,8 @@ public static void Example() // RSquared: 0.96 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -70,12 +80,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -95,10 +107,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.tt index 4ecbbf1134..9befcb25e1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.tt @@ -1,12 +1,15 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="FastTreeTweedieRegression"; string ExtraUsing = null; -string Trainer = @"FastTreeTweedie(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @"FastForest( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs index a0f8230954..eb4de40868 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Regression { public static class FastTreeTweedieWithOptionsRegression { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -30,7 +33,9 @@ public static void Example() LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), // Use L2Norm for early stopping. - EarlyStoppingMetric = Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + EarlyStoppingMetric = + Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + // Create a simpler model by penalizing usage of new features. FeatureFirstUsePenalty = 0.1, // Reduce the number of trees to 50. @@ -38,21 +43,26 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.FastTreeTweedie(options); + var pipeline = + mlContext.Regression.Trainers.FastTreeTweedie(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -74,7 +84,8 @@ public static void Example() // RSquared: 0.98 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -84,12 +95,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -109,10 +122,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.tt index 98d4d5e6a5..1c37bbe506 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.tt @@ -1,8 +1,9 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="FastTreeTweedieWithOptionsRegression"; string ExtraUsing = "using Microsoft.ML.Trainers.FastTree;"; @@ -12,7 +13,9 @@ string TrainerOptions = @"FastTreeTweedieTrainer.Options LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), // Use L2Norm for early stopping. - EarlyStoppingMetric = Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + EarlyStoppingMetric = + Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + // Create a simpler model by penalizing usage of new features. FeatureFirstUsePenalty = 0.1, // Reduce the number of trees to 50. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs index 1ed25cff1b..235a298d82 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Regression { public static class FastTreeWithOptionsRegression { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -29,9 +32,11 @@ public static void Example() { LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), - // Use L2-norm for early stopping. If the gradient's L2-norm is smaller than - // an auto-computed value, training process will stop. - EarlyStoppingMetric = Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + // Use L2-norm for early stopping. If the gradient's L2-norm is + // smaller than an auto-computed value, training process will stop. + EarlyStoppingMetric = + Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + // Create a simpler model by penalizing usage of new features. FeatureFirstUsePenalty = 0.1, // Reduce the number of trees to 50. @@ -39,21 +44,26 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.FastTree(options); + var pipeline = + mlContext.Regression.Trainers.FastTree(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -75,7 +85,8 @@ public static void Example() // RSquared: 0.99 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -85,12 +96,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -110,10 +123,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.tt index df768cf53d..cb80ea4566 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.tt @@ -1,8 +1,9 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="FastTreeWithOptionsRegression"; string ExtraUsing = "using Microsoft.ML.Trainers.FastTree;"; @@ -11,9 +12,11 @@ string TrainerOptions = @"FastTreeRegressionTrainer.Options { LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), - // Use L2-norm for early stopping. If the gradient's L2-norm is smaller than - // an auto-computed value, training process will stop. - EarlyStoppingMetric = Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + // Use L2-norm for early stopping. If the gradient's L2-norm is + // smaller than an auto-computed value, training process will stop. + EarlyStoppingMetric = + Microsoft.ML.Trainers.FastTree.EarlyStoppingMetric.L2Norm, + // Create a simpler model by penalizing usage of new features. FeatureFirstUsePenalty = 0.1, // Reduce the number of trees to 50. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs index f58450d9f8..f1c568ce9c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs @@ -8,37 +8,46 @@ namespace Samples.Dynamic.Trainers.Regression { public static class Gam { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.Gam(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers.Gam( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -60,7 +69,8 @@ public static void Example() // RSquared: 0.99 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -70,12 +80,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -95,10 +107,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.tt index e928c0bcc2..06a0d9bf0b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.tt @@ -1,12 +1,16 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="Gam"; string ExtraUsing = null; -string Trainer = @"Gam(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @"Gam( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; + string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamAdvanced.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamAdvanced.cs index 419ee531ee..98485d0697 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamAdvanced.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamAdvanced.cs @@ -7,11 +7,13 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class GamAdvanced { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); @@ -27,30 +29,36 @@ public static void Example() var validSet = dataSets.TestSet; // Create a GAM trainer. - // Use a small number of bins for this example. The setting below means for each feature, - // we divide its range into 16 discrete regions for the training process. Note that these - // regions are not evenly spaced, and that the final model may contain fewer bins, as - // neighboring bins with identical values will be combined. In general, we recommend using - // at least the default number of bins, as a small number of bins limits the capacity of - // the model. - var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16); - - // Fit the model using both of training and validation sets. GAM can use a technique called - // pruning to tune the model to the validation set after training to improve generalization. + // Use a small number of bins for this example. The setting below means + // for each feature, we divide its range into 16 discrete regions for + // the training process. Note that these regions are not evenly spaced, + // and that the final model may contain fewer bins, as neighboring bins + // with identical values will be combined. In general, we recommend + // using at least the default number of bins, as a small number of bins + // limits the capacity of the model. + var trainer = mlContext.BinaryClassification.Trainers.Gam( + maximumBinCountPerFeature: 16); + + // Fit the model using both of training and validation sets. GAM can use + // a technique called pruning to tune the model to the validation set + // after training to improve generalization. var model = trainer.Fit(trainSet, validSet); // Extract the model parameters. var gam = model.Model.SubModel; - // Now we can inspect the parameters of the Generalized Additive Model to understand the fit - // and potentially learn about our dataset. - // First, we will look at the bias; the bias represents the average prediction for the training data. + // Now we can inspect the parameters of the Generalized Additive Model + // to understand the fit and potentially learn about our dataset. + // First, we will look at the bias; the bias represents the average + // prediction for the training data. Console.WriteLine($"Average prediction: {gam.Bias:0.00}"); - // Now look at the shape functions that the model has learned. Similar to a linear model, we have - // one response per feature, and they are independent. Unlike a linear model, this response is a - // generic function instead of a line. Because we have included a bias term, each feature response - // represents the deviation from the average prediction as a function of the feature value. + // Now look at the shape functions that the model has learned. Similar + // to a linear model, we have one response per feature, and they are + // independent. Unlike a linear model, this response is a generic + // function instead of a line. Because we have included a bias term, + // each feature response represents the deviation from the average + // prediction as a function of the feature value. for (int i = 0; i < gam.NumberOfShapeFunctions; i++) { // Break a line. @@ -62,11 +70,13 @@ public static void Example() // Get the bin effects; these are the function values for each bin. var binEffects = gam.GetBinEffects(i); - // Now, write the function to the console. The function is a set of bins, and the corresponding - // function values. You can think of GAMs as building a bar-chart or lookup table for each feature. + // Now, write the function to the console. The function is a set of + // bins, and the corresponding function values. You can think of + // GAMs as building a bar-chart or lookup table for each feature. Console.WriteLine($"Feature{i}"); for (int j = 0; j < binUpperBounds.Count; j++) - Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); + Console.WriteLine( + $"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); } // Expected output: @@ -91,18 +101,23 @@ public static void Example() // x < 0.31 => -0.138 // x < ∞ => -0.188 - // Let's consider this output. To score a given example, we look up the first bin where the inequality - // is satisfied for the feature value. We can look at the whole function to get a sense for how the - // model responds to the variable on a global level. - // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average - // expected output over the training set. Very few bins are used to model the second feature because the GAM model - // discards unchanged bins to create smaller models. - // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be - // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use - // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is - // real or just sampling noise. See for example: - // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model - // Distillation." arXiv:1710.06169." + // Let's consider this output. To score a given example, we look up the + // first bin where the inequality is satisfied for the feature value. + // We can look at the whole function to get a sense for how the model + // responds to the variable on a global level. The model can be seen to + // reconstruct the parabolic and step-wise function, shifted with + // respect to the average expected output over the training set. Very + // few bins are used to model the second feature because the GAM model + // discards unchanged bins to create smaller models. One last thing to + // notice is that these feature functions can be noisy. While we know + // that Feature1 should be symmetric, this is not captured in the model. + // This is due to noise in the data. Common practice is to use + // resampling methods to estimate a confidence interval at each bin. + // This will help to determine if the effect is real or just sampling + // noise. See for example: Tan, Caruana, Hooker, and Lou. + // "Distill-and-Compare: Auditing Black-Box Models Using Transparent + // Model Distillation." + // arXiv:1710.06169." } private class Data @@ -114,13 +129,16 @@ private class Data } /// - /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0, - /// while Feature2 is a simple piecewise function. + /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. + /// Feature1 is a parabola centered around 0, while Feature2 is a simple + /// piecewise function. /// /// The number of examples to generate. - /// The seed for the random number generator used to produce data. + /// The seed for the random number generator used to + /// produce data. /// - private static IEnumerable GenerateData(int numExamples = 25000, int seed = 1) + private static IEnumerable GenerateData(int numExamples = 25000, + int seed = 1) { var rng = new Random(seed); float centeredFloat() => (float)(rng.NextDouble() - 0.5); @@ -132,7 +150,8 @@ private static IEnumerable GenerateData(int numExamples = 25000, int seed Features = new float[2] { centeredFloat(), centeredFloat() } }; // Compute the label from the shape functions and add noise. - data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5; + data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise( + data.Features[1]) + centeredFloat()) > 0.5; yield return data; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs index 004f90fb20..1b69ae8764 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Regression { public static class GamWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -36,21 +39,26 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.Gam(options); + var pipeline = + mlContext.Regression.Trainers.Gam(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -72,7 +80,8 @@ public static void Example() // RSquared: 0.98 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -82,12 +91,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -107,10 +118,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.tt index cd45b0442b..c15a436f13 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.tt @@ -1,8 +1,9 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ClassName="GamWithOptions"; string ExtraUsing = "using Microsoft.ML.Trainers.FastTree;"; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptionsAdvanced.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptionsAdvanced.cs index 167f7aec2e..1f5b546239 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptionsAdvanced.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptionsAdvanced.cs @@ -8,12 +8,14 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class GamWithOptionsAdvanced { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree found at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(); // Create the dataset. @@ -28,14 +30,15 @@ public static void Example() var validSet = dataSets.TestSet; // Create a GAM trainer. - // Use a small number of bins for this example. The setting below means for each feature, - // we divide its range into 16 discrete regions for the training process. Note that these - // regions are not evenly spaced, and that the final model may contain fewer bins, as - // neighboring bins with identical values will be combined. In general, we recommend using - // at least the default number of bins, as a small number of bins limits the capacity of - // the model. - // Also, set the learning rate to half the default to slow down the gradient descent, and - // double the number of iterations to compensate. + // Use a small number of bins for this example. The setting below means + // for each feature, we divide its range into 16 discrete regions for + // the training process. Note that these regions are not evenly spaced, + // and that the final model may contain fewer bins, as neighboring bins + // with identical values will be combined. In general, we recommend + // using at least the default number of bins, as a small number of bins + // limits the capacity of the model. Also, set the learning rate to half + // the default to slow down the gradient descent, and double the number + // of iterations to compensate. var trainer = mlContext.BinaryClassification.Trainers.Gam( new GamBinaryTrainer.Options { @@ -44,22 +47,26 @@ public static void Example() LearningRate = 0.001 }); - // Fit the model using both of training and validation sets. GAM can use a technique called - // pruning to tune the model to the validation set after training to improve generalization. + // Fit the model using both of training and validation sets. GAM can use + // a technique called pruning to tune the model to the validation set + // after training to improve generalization. var model = trainer.Fit(trainSet, validSet); // Extract the model parameters. var gam = model.Model.SubModel; - // Now we can inspect the parameters of the Generalized Additive Model to understand the fit - // and potentially learn about our dataset. - // First, we will look at the bias; the bias represents the average prediction for the training data. + // Now we can inspect the parameters of the Generalized Additive Model + // to understand the fit and potentially learn about our dataset. + // First, we will look at the bias; the bias represents the average + // prediction for the training data. Console.WriteLine($"Average prediction: {gam.Bias:0.00}"); - // Now look at the shape functions that the model has learned. Similar to a linear model, we have - // one response per feature, and they are independent. Unlike a linear model, this response is a - // generic function instead of a line. Because we have included a bias term, each feature response - // represents the deviation from the average prediction as a function of the feature value. + // Now look at the shape functions that the model has learned. Similar + // to a linear model, we have one response per feature, and they are + // independent. Unlike a linear model, this response is a generic + // function instead of a line. Because we have included a bias term, + // each feature response represents the deviation from the average + // prediction as a function of the feature value. for (int i = 0; i < gam.NumberOfShapeFunctions; i++) { // Break a line. @@ -71,11 +78,13 @@ public static void Example() // Get the bin effects; these are the function values for each bin. var binEffects = gam.GetBinEffects(i); - // Now, write the function to the console. The function is a set of bins, and the corresponding - // function values. You can think of GAMs as building a bar-chart or lookup table for each feature. + // Now, write the function to the console. The function is a set of + // bins, and the corresponding function values. You can think of + // GAMs as building a bar-chart or lookup table for each feature. Console.WriteLine($"Feature{i}"); for (int j = 0; j < binUpperBounds.Count; j++) - Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); + Console.WriteLine( + $"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); } // Expected output: @@ -100,18 +109,23 @@ public static void Example() // x < 0.31 => -0.138 // x < ∞ => -0.188 - // Let's consider this output. To score a given example, we look up the first bin where the inequality - // is satisfied for the feature value. We can look at the whole function to get a sense for how the - // model responds to the variable on a global level. - // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average - // expected output over the training set. Very few bins are used to model the second feature because the GAM model - // discards unchanged bins to create smaller models. - // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be - // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use - // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is - // real or just sampling noise. See for example: - // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model - // Distillation." arXiv:1710.06169." + // Let's consider this output. To score a given example, we look up the + // first bin where the inequality is satisfied for the feature value. + // We can look at the whole function to get a sense for how the model + // responds to the variable on a global level. The model can be seen to + // reconstruct the parabolic and step-wise function, shifted with + // respect to the average expected output over the training set. Very + // few bins are used to model the second feature because the GAM model + // discards unchanged bins to create smaller models. One last thing to + // notice is that these feature functions can be noisy. While we know + // that Feature1 should be symmetric, this is not captured in the model. + // This is due to noise in the data. Common practice is to use + // resampling methods to estimate a confidence interval at each bin. + // This will help to determine if the effect is real or just sampling + // noise. See for example: Tan, Caruana, Hooker, and Lou. + // "Distill-and-Compare: Auditing Black-Box Models Using Transparent + // Model Distillation." + // arXiv:1710.06169." } private class Data @@ -123,13 +137,16 @@ private class Data } /// - /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0, - /// while Feature2 is a simple piecewise function. + /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. + /// Feature1 is a parabola centered around 0, while Feature2 is a simple + /// piecewise function. /// /// The number of examples to generate. - /// The seed for the random number generator used to produce data. + /// The seed for the random number generator used to + /// produce data. /// - private static IEnumerable GenerateData(int numExamples = 25000, int seed = 1) + private static IEnumerable GenerateData(int numExamples = 25000, + int seed = 1) { var rng = new Random(seed); float centeredFloat() => (float)(rng.NextDouble() - 0.5); @@ -141,7 +158,8 @@ private static IEnumerable GenerateData(int numExamples = 25000, int seed Features = new float[2] { centeredFloat(), centeredFloat() } }; // Compute the label from the shape functions and add noise. - data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5; + data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise( + data.Features[1]) + centeredFloat()) > 0.5; yield return data; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.cs index 854cddc537..bf6978f300 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.cs @@ -10,33 +10,42 @@ public static class LbfgsPoissonRegression { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.LbfgsPoissonRegression(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers. + LbfgsPoissonRegression( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -58,7 +67,8 @@ public static void Example() // RSquared: 0.93 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -68,12 +78,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -93,10 +105,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.tt index 08d6d22969..2c410cf954 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegression.tt @@ -4,7 +4,10 @@ string ClassHeader = null; string ClassName="LbfgsPoissonRegression"; string ExtraUsing = null; -string Trainer = "LbfgsPoissonRegression(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @" + LbfgsPoissonRegression( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.cs index b649699f9c..3c5a7da604 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.cs @@ -11,15 +11,17 @@ public static class LbfgsPoissonRegressionWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -27,30 +29,37 @@ public static void Example() { LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), - // Reduce optimization tolerance to speed up training at the cost of accuracy. + // Reduce optimization tolerance to speed up training at the cost of + // accuracy. OptimizationTolerance = 1e-4f, - // Decrease history size to speed up training at the cost of accuracy. + // Decrease history size to speed up training at the cost of + // accuracy. HistorySize = 30, // Specify scale for initial weights. InitialWeightsDiameter = 0.2f }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.LbfgsPoissonRegression(options); + var pipeline = + mlContext.Regression.Trainers.LbfgsPoissonRegression(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -72,7 +81,8 @@ public static void Example() // RSquared: 0.89 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -82,12 +92,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -107,10 +119,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.tt index f1513d9868..bbe4dbaefa 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LbfgsPoissonRegressionWithOptions.tt @@ -9,9 +9,11 @@ string TrainerOptions = @"LbfgsPoissonRegressionTrainer.Options { LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), - // Reduce optimization tolerance to speed up training at the cost of accuracy. + // Reduce optimization tolerance to speed up training at the cost of + // accuracy. OptimizationTolerance = 1e-4f, - // Decrease history size to speed up training at the cost of accuracy. + // Decrease history size to speed up training at the cost of + // accuracy. HistorySize = 30, // Specify scale for initial weights. InitialWeightsDiameter = 0.2f diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs index 4579280109..4502682f26 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs @@ -8,37 +8,47 @@ namespace Samples.Dynamic.Trainers.Regression { public static class LightGbm { - // This example requires installation of additional NuGet package - // Microsoft.ML.LightGBM. + // This example requires installation of additional NuGet + // package for Microsoft.ML.LightGBM + // at https://www.nuget.org/packages/Microsoft.ML.LightGbm/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.LightGbm(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers. + LightGbm( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -60,7 +70,8 @@ public static void Example() // RSquared: 0.89 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -70,12 +81,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -95,10 +108,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.tt index cb7f481342..c3af40c187 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.tt @@ -1,12 +1,16 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.LightGBM. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.LightGBM + // at https://www.nuget.org/packages/Microsoft.ML.LightGbm/"; string ClassName="LightGbm"; string ExtraUsing = null; -string Trainer = @"LightGbm(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @" + LightGbm( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmAdvanced.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmAdvanced.cs index 363d746305..feba7ca9d3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmAdvanced.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmAdvanced.cs @@ -7,15 +7,18 @@ namespace Samples.Dynamic.Trainers.Regression { class LightGbmAdvanced { - // This example requires installation of additional nuget package Microsoft.ML.LightGBM. + // This example requires installation of additional NuGet package + // for Microsoft.ML.LightGBM + // at https://www.nuget.org/packages/Microsoft.ML.LightGbm/ public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Download and load the housing dataset into an IDataView. - var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); + var dataView = Microsoft.ML.SamplesUtils.DatasetUtils + .LoadHousingRegressionDataset(mlContext); //////////////////// Data Preview //////////////////// /// Only 6 columns are displayed here. @@ -33,26 +36,32 @@ public static void Example() .Select(column => column.Name) // Get the column names .Where(name => name != labelName) // Drop the Label .ToArray(); - var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Regression.Trainers.LightGbm( - labelColumnName: labelName, - numberOfLeaves: 4, - minimumExampleCountPerLeaf: 6, - learningRate: 0.001)); + var pipeline = mlContext.Transforms.Concatenate("Features", + featureNames) + .Append(mlContext.Regression.Trainers.LightGbm( + labelColumnName: labelName, + numberOfLeaves: 4, + minimumExampleCountPerLeaf: 6, + learningRate: 0.001)); // Fit this pipeline to the training data. var model = pipeline.Fit(split.TrainSet); - // Get the feature importance based on the information gain used during training. + // Get the feature importance based on the information gain used during + // training. VBuffer weights = default; model.LastTransformer.Model.GetFeatureWeights(ref weights); var weightsValues = weights.DenseValues().ToArray(); - Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 - Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 + Console.WriteLine($"weight 0 - {weightsValues[0]}"); + // CrimesPerCapita (weight 0) = 0.1898361 + Console.WriteLine($"weight 5 - {weightsValues[5]}"); + // RoomsPerDwelling (weight 5) = 1 // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); - var metrics = mlContext.Regression.Evaluate(dataWithPredictions, labelColumnName: labelName); + var metrics = mlContext.Regression.Evaluate( + dataWithPredictions, + labelColumnName: labelName); PrintMetrics(metrics); // Expected output @@ -63,12 +72,14 @@ public static void Example() // RSquared: 0.08 } - public static void PrintMetrics(RegressionMetrics metrics) + private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs index 24caa23360..fb9b783631 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Regression { public static class LightGbmWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.LightGBM. + // This example requires installation of additional NuGet + // package for Microsoft.ML.LightGBM + // at https://www.nuget.org/packages/Microsoft.ML.LightGbm/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -34,7 +37,8 @@ public static void Example() // Each leaf contains at least this number of training data points. MinimumExampleCountPerLeaf = 6, // The step size per update. Using a large value might reduce the - // training time but also increase the algorithm's numerical stability. + // training time but also increase the algorithm's numerical + // stability. LearningRate = 0.001, Booster = new Microsoft.ML.Trainers.LightGbm.GossBooster.Options() { @@ -44,21 +48,26 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.LightGbm(options); + var pipeline = + mlContext.Regression.Trainers.LightGbm(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -80,7 +89,8 @@ public static void Example() // RSquared: 0.97 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -90,12 +100,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -115,10 +127,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.tt index 22b6522baf..b1105186db 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.tt @@ -1,8 +1,9 @@ <#@ include file="RegressionSamplesTemplate.ttinclude"#> <#+ -string ClassHeader = @"// This example requires installation of additional NuGet package - // Microsoft.ML.LightGBM. "; +string ClassHeader = @"// This example requires installation of additional NuGet + // package for Microsoft.ML.LightGBM + // at https://www.nuget.org/packages/Microsoft.ML.LightGbm/"; string ClassName="LightGbmWithOptions"; string ExtraUsing = "using Microsoft.ML.Trainers.LightGbm;"; @@ -16,7 +17,8 @@ string TrainerOptions = @"LightGbmRegressionTrainer.Options // Each leaf contains at least this number of training data points. MinimumExampleCountPerLeaf = 6, // The step size per update. Using a large value might reduce the - // training time but also increase the algorithm's numerical stability. + // training time but also increase the algorithm's numerical + // stability. LearningRate = 0.001, Booster = new Microsoft.ML.Trainers.LightGbm.GossBooster.Options() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptionsAdvanced.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptionsAdvanced.cs index acc48bbdf7..ea98c44670 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptionsAdvanced.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptionsAdvanced.cs @@ -8,11 +8,13 @@ namespace Samples.Dynamic.Trainers.Regression { class LightGbmWithOptionsAdvanced { - // This example requires installation of additional nuget package Microsoft.ML.LightGBM. + // This example requires installation of additional NuGet package + // for Microsoft.ML.LightGBM + // at https://www.nuget.org/packages/Microsoft.ML.LightGbm/ public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Download and load the housing dataset into an IDataView. @@ -35,33 +37,40 @@ public static void Example() .Select(column => column.Name) // Get the column names .Where(name => name != labelName) // Drop the Label .ToArray(); - var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Regression.Trainers.LightGbm(new LightGbmRegressionTrainer.Options - { - LabelColumnName = labelName, - NumberOfLeaves = 4, - MinimumExampleCountPerLeaf = 6, - LearningRate = 0.001, - Booster = new GossBooster.Options() - { - TopRate = 0.3, - OtherRate = 0.2 - } - })); + var pipeline = mlContext.Transforms.Concatenate( + "Features", featureNames) + .Append(mlContext.Regression.Trainers.LightGbm( + new LightGbmRegressionTrainer.Options + { + LabelColumnName = labelName, + NumberOfLeaves = 4, + MinimumExampleCountPerLeaf = 6, + LearningRate = 0.001, + Booster = new GossBooster.Options() + { + TopRate = 0.3, + OtherRate = 0.2 + } + })); // Fit this pipeline to the training data. var model = pipeline.Fit(split.TrainSet); - // Get the feature importance based on the information gain used during training. + // Get the feature importance based on the information gain used during + // training. VBuffer weights = default; model.LastTransformer.Model.GetFeatureWeights(ref weights); var weightsValues = weights.DenseValues().ToArray(); - Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 - Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 + Console.WriteLine($"weight 0 - {weightsValues[0]}"); + // CrimesPerCapita (weight 0) = 0.1898361 + Console.WriteLine($"weight 5 - {weightsValues[5]}"); + // RoomsPerDwelling (weight 5) = 1 // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); - var metrics = mlContext.Regression.Evaluate(dataWithPredictions, labelColumnName: labelName); + var metrics = mlContext.Regression.Evaluate( + dataWithPredictions, + labelColumnName: labelName); PrintMetrics(metrics); // Expected output @@ -74,10 +83,12 @@ public static void Example() public static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs index 3914d43c03..e02d4c626f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs @@ -10,37 +10,46 @@ public static class OnlineGradientDescent { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.OnlineGradientDescent(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers.OnlineGradientDescent( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); - // This trainer is not numerically stable. Please see issue #2425. + // This trainer is not numerically stable. + // Please see issue #2425. // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); @@ -49,7 +58,8 @@ public static void Example() } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -59,12 +69,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -84,10 +96,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.tt index 783f535261..e9579e9121 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.tt @@ -4,9 +4,13 @@ string ClassHeader = null; string ClassName="OnlineGradientDescent"; string ExtraUsing = null; -string Trainer = "OnlineGradientDescent(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @"OnlineGradientDescent( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; string TrainerOptions = null; -string ExpectedOutputPerInstance= @"// This trainer is not numerically stable. Please see issue #2425."; +string ExpectedOutputPerInstance= @"// This trainer is not numerically stable. + // Please see issue #2425."; + string ExpectedOutput = @""; #> \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs index 36e3d187ca..59a7d1fdbc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs @@ -11,15 +11,17 @@ public static class OnlineGradientDescentWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -38,34 +40,42 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.OnlineGradientDescent(options); + var pipeline = + mlContext.Regression.Trainers.OnlineGradientDescent(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); - // This trainer is not numerically stable. Please see issue #2425. + // This trainer is not numerically stable. + // Please see issue #2425. // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); PrintMetrics(metrics); - // This trainer is not numerically stable. Please see issue #2425. + // This trainer is not numerically stable. Please see + // issue #2425. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -75,12 +85,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -100,10 +112,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.tt index 1616bf8fcb..c794a418a2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.tt @@ -19,6 +19,8 @@ string TrainerOptions = @" OnlineGradientDescentTrainer.Options InitialWeightsDiameter = 0.2f }"; -string ExpectedOutputPerInstance= @"// This trainer is not numerically stable. Please see issue #2425."; -string ExpectedOutput = @"// This trainer is not numerically stable. Please see issue #2425."; +string ExpectedOutputPerInstance= @"// This trainer is not numerically stable. + // Please see issue #2425."; +string ExpectedOutput = @"// This trainer is not numerically stable. Please see + // issue #2425."; #> \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs index 31ca5cfd9d..1b50d80443 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs @@ -10,33 +10,41 @@ public static class Ols { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.Ols(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers.Ols( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -58,7 +66,8 @@ public static void Example() // RSquared: 0.97 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -68,12 +77,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -93,10 +104,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.tt index b2aed0bae7..909ef50ba0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.tt @@ -4,7 +4,9 @@ string ClassHeader = null; string ClassName="Ols"; string ExtraUsing = null; -string Trainer = "Ols(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @"Ols( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresAdvanced.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresAdvanced.cs index c5c3b2c097..dd09f21fef 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresAdvanced.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresAdvanced.cs @@ -7,23 +7,29 @@ namespace Samples.Dynamic.Trainers.Regression { public static class OrdinaryLeastSquaresAdvanced { - // This example requires installation of additional nuget package Microsoft.ML.Mkl.Components. - // In this examples we will use the housing price dataset. The goal is to predict median home value. - // For more details about this dataset, please see https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ + // This example requires installation of additional nuget package + // for Microsoft.ML.Mkl.Components at + // "https://www.nuget.org/packages/Microsoft.ML.Mkl.Components/" + // In this examples we will use the housing price dataset. The goal is to + // predict median home value. For more details about this dataset, please + // see https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ public static void Example() { - // Downloading a regression dataset from github.com/dotnet/machinelearning - string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + // Downloading a regression dataset from + // github.com/dotnet/machinelearning + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils + .DownloadHousingRegressionDataset(); - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(seed: 3); // Creating a data loader, based on the format of the data // The data is tab separated with all numeric columns. // The first column being the label and rest are numeric features // Here only seven numeric columns are used as features - var dataView = mlContext.Data.LoadFromTextFile(dataFile, new TextLoader.Options + var dataView = mlContext.Data.LoadFromTextFile(dataFile, + new TextLoader.Options { Separators = new[] { '\t' }, HasHeader = true, @@ -50,8 +56,10 @@ public static void Example() // Check the weights that the model learned var weightsValues = model.Model.Weights; - Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = -0.1682112 - Console.WriteLine($"weight 3 - {weightsValues[3]}"); // CharlesRiver (weight 1) = 3.663493 + Console.WriteLine($"weight 0 - {weightsValues[0]}"); + // CrimesPerCapita (weight 0) = -0.1682112 + Console.WriteLine($"weight 3 - {weightsValues[3]}"); + // CharlesRiver (weight 1) = 3.663493 var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Regression.Evaluate(dataWithPredictions); @@ -67,10 +75,12 @@ public static void Example() public static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs index c56b71cd33..04b55ecfbe 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs @@ -11,15 +11,17 @@ public static class OlsWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -29,26 +31,32 @@ public static void Example() FeatureColumnName = nameof(DataPoint.Features), // Larger values leads to smaller (closer to zero) model parameters. L2Regularization = 0.1f, - // Whether to computate standard error and other statistics of model parameters. + // Whether to computate standard error and other statistics of model + // parameters. CalculateStatistics = false }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.Ols(options); + var pipeline = + mlContext.Regression.Trainers.Ols(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -70,7 +78,8 @@ public static void Example() // RSquared: 0.97 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -80,12 +89,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -105,10 +116,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.tt index d37761637e..03c4fd961e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.tt @@ -11,7 +11,8 @@ string TrainerOptions = @"OlsTrainer.Options FeatureColumnName = nameof(DataPoint.Features), // Larger values leads to smaller (closer to zero) model parameters. L2Regularization = 0.1f, - // Whether to computate standard error and other statistics of model parameters. + // Whether to computate standard error and other statistics of model + // parameters. CalculateStatistics = false }"; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptionsAdvanced.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptionsAdvanced.cs index 69728ac5b9..e53b14cce7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptionsAdvanced.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptionsAdvanced.cs @@ -8,23 +8,28 @@ namespace Samples.Dynamic.Trainers.Regression { public static class OrdinaryLeastSquaresWithOptionsAdvanced { - // This example requires installation of additional nuget package Microsoft.ML.Mkl.Components. - // In this examples we will use the housing price dataset. The goal is to predict median home value. - // For more details about this dataset, please see https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ + // This example requires installation of additional nuget package + // for Microsoft.ML.Mkl.Components at + // "https://www.nuget.org/packages/Microsoft.ML.Mkl.Components/" + // In this examples we will use the housing price dataset. The goal is to + // predict median home value. For more details about this dataset, please + // see https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ public static void Example() { - // Downloading a regression dataset from github.com/dotnet/machinelearning + // Downloading a regression dataset from + // github.com/dotnet/machinelearning string dataFile = DatasetUtils.DownloadHousingRegressionDataset(); - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(seed: 3); // Creating a data loader, based on the format of the data // The data is tab separated with all numeric columns. // The first column being the label and rest are numeric features // Here only seven numeric columns are used as features - var dataView = mlContext.Data.LoadFromTextFile(dataFile, new TextLoader.Options + var dataView = mlContext.Data.LoadFromTextFile(dataFile, + new TextLoader.Options { Separators = new[] { '\t' }, HasHeader = true, @@ -45,7 +50,8 @@ public static void Example() // Create the estimator, here we only need OrdinaryLeastSquares trainer // as data is already processed in a form consumable by the trainer - var pipeline = mlContext.Regression.Trainers.Ols(new OlsTrainer.Options() + var pipeline = mlContext.Regression.Trainers.Ols( + new OlsTrainer.Options() { L2Regularization = 0.1f, CalculateStatistics = false @@ -54,8 +60,10 @@ public static void Example() // Check the weights that the model learned var weightsValues = model.Model.Weights; - Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = -0.1783206 - Console.WriteLine($"weight 3 - {weightsValues[3]}"); // CharlesRiver (weight 1) = 3.118422 + Console.WriteLine($"weight 0 - {weightsValues[0]}"); + // CrimesPerCapita (weight 0) = -0.1783206 + Console.WriteLine($"weight 3 - {weightsValues[3]}"); + // CharlesRiver (weight 1) = 3.118422 var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Regression.Evaluate(dataWithPredictions); @@ -71,10 +79,12 @@ public static void Example() public static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs index 90cf94db2a..dfa04d1b76 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PermutationFeatureImportance.cs @@ -9,8 +9,9 @@ public static class PermutationFeatureImportance { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(seed:1); // Create sample data. @@ -19,10 +20,14 @@ public static void Example() // Load the sample data as an IDataView. var data = mlContext.Data.LoadFromEnumerable(samples); - // Define a training pipeline that concatenates features into a vector, normalizes them, and then - // trains a linear model. - var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; - var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) + // Define a training pipeline that concatenates features into a vector, + // normalizes them, and then trains a linear model. + var featureColumns = new string[] { nameof(Data.Feature1), + nameof(Data.Feature2) }; + + var pipeline = mlContext.Transforms.Concatenate( + "Features", + featureColumns) .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Regression.Trainers.Ols()); @@ -35,18 +40,29 @@ public static void Example() // Extract the predictor. var linearPredictor = model.LastTransformer; - // Compute the permutation metrics for the linear model using the normalized data. - var permutationMetrics = mlContext.Regression.PermutationFeatureImportance( + // Compute the permutation metrics for the linear model using the + // normalized data. + var permutationMetrics = mlContext.Regression + .PermutationFeatureImportance( linearPredictor, transformedData, permutationCount: 30); - // Now let's look at which features are most important to the model overall. - // Get the feature indices sorted by their impact on RMSE. - var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.RootMeanSquaredError}) - .OrderByDescending(feature => Math.Abs(feature.RootMeanSquaredError.Mean)) + // Now let's look at which features are most important to the model + // overall. Get the feature indices sorted by their impact on RMSE. + var sortedIndices = permutationMetrics + .Select((metrics, index) => new { index, + metrics.RootMeanSquaredError}) + + .OrderByDescending(feature => Math.Abs( + feature.RootMeanSquaredError.Mean)) + .Select(feature => feature.index); - Console.WriteLine("Feature\tModel Weight\tChange in RMSE\t95% Confidence in the Mean Change in RMSE"); - var rmse = permutationMetrics.Select(x => x.RootMeanSquaredError).ToArray(); + Console.WriteLine("Feature\tModel Weight\tChange in RMSE\t95%" + + "Confidence in the Mean Change in RMSE"); + + var rmse = permutationMetrics.Select(x => x.RootMeanSquaredError) + .ToArray(); + foreach (int i in sortedIndices) { Console.WriteLine("{0}\t{1:0.00}\t{2:G4}\t{3:G4}", @@ -76,10 +92,14 @@ private class Data /// linear combination of the features. /// /// The number of examples. - /// The bias, or offset, in the calculation of the label. - /// The weight to multiply the first feature with to compute the label. - /// The weight to multiply the second feature with to compute the label. - /// The seed for generating feature values and label noise. + /// The bias, or offset, in the calculation of the label. + /// + /// The weight to multiply the first feature with to + /// compute the label. + /// The weight to multiply the second feature with to + /// compute the label. + /// The seed for generating feature values and label + /// noise. /// An enumerable of Data objects. private static IEnumerable GenerateData(int nExamples = 10000, double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) @@ -94,7 +114,8 @@ private static IEnumerable GenerateData(int nExamples = 10000, }; // Create a noisy label. - data.Label = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + data.Label = (float)(bias + weight1 * data.Feature1 + weight2 * + data.Feature2 + rng.NextDouble() - 0.5); yield return data; } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude index f4193eefd4..6b8d3ff2b4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude @@ -16,15 +16,17 @@ namespace Samples.Dynamic.Trainers.Regression <# } #> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (TrainerOptions == null) { #> @@ -35,22 +37,27 @@ namespace Samples.Dynamic.Trainers.Regression var options = new <#=TrainerOptions#>; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.<#=Trainer#>(options); + var pipeline = + mlContext.Regression.Trainers.<#=Trainer#>(options); <# } #> // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -63,7 +70,8 @@ namespace Samples.Dynamic.Trainers.Regression <#=ExpectedOutput#> } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -73,12 +81,14 @@ namespace Samples.Dynamic.Trainers.Regression { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -98,10 +108,12 @@ namespace Samples.Dynamic.Trainers.Regression // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.cs index 460334f283..4247c1447e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.cs @@ -10,33 +10,41 @@ public static class Sdca { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Regression.Trainers.Sdca(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features)); + var pipeline = mlContext.Regression.Trainers.Sdca( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -58,7 +66,8 @@ public static void Example() // RSquared: 0.97 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -68,12 +77,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -93,10 +104,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.tt index 2a74a40116..bd1581da6b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Sdca.tt @@ -4,7 +4,9 @@ string ClassHeader = null; string ClassName="Sdca"; string ExtraUsing = null; -string Trainer = "Sdca(labelColumnName: nameof(DataPoint.Label), featureColumnName: nameof(DataPoint.Features))"; +string Trainer = @"Sdca( + labelColumnName: nameof(DataPoint.Label), + featureColumnName: nameof(DataPoint.Features))"; string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.cs index c5c06350e7..55e05ab3a1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.cs @@ -11,15 +11,17 @@ public static class SdcaWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -27,31 +29,38 @@ public static void Example() { LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), - // Make the convergence tolerance tighter. It effectively leads to more training iterations. + // Make the convergence tolerance tighter. It effectively leads to + // more training iterations. ConvergenceTolerance = 0.02f, - // Increase the maximum number of passes over training data. Similar to ConvergenceTolerance, - // this value specifics the hard iteration limit on the training algorithm. + // Increase the maximum number of passes over training data. Similar + // to ConvergenceTolerance, this value specifics the hard iteration + // limit on the training algorithm. MaximumNumberOfIterations = 30, // Increase learning rate for bias. BiasLearningRate = 0.1f }; // Define the trainer. - var pipeline = mlContext.Regression.Trainers.Sdca(options); + var pipeline = + mlContext.Regression.Trainers.Sdca(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(5, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + transformedTestData, reuseRowObject: false).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); @@ -73,7 +82,8 @@ public static void Example() // RSquared: 0.97 (closer to 1 is better. The worest case is 0) } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -83,12 +93,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 50).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 50).Select( + x => x + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -108,10 +120,12 @@ private class Prediction // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine( + "Root Mean Squared Error: " + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.tt index 083efe8cf6..5c350caf66 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/SdcaWithOptions.tt @@ -9,10 +9,12 @@ string TrainerOptions = @"SdcaRegressionTrainer.Options { LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), - // Make the convergence tolerance tighter. It effectively leads to more training iterations. + // Make the convergence tolerance tighter. It effectively leads to + // more training iterations. ConvergenceTolerance = 0.02f, - // Increase the maximum number of passes over training data. Similar to ConvergenceTolerance, - // this value specifics the hard iteration limit on the training algorithm. + // Increase the maximum number of passes over training data. Similar + // to ConvergenceTolerance, this value specifics the hard iteration + // limit on the training algorithm. MaximumNumberOfIterations = 30, // Increase learning rate for bias. BiasLearningRate = 0.1f