diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyONNXModelWithInMemoryImages.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyONNXModelWithInMemoryImages.cs index 430ad8fbfc..4c1c3a3d4b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyONNXModelWithInMemoryImages.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyONNXModelWithInMemoryImages.cs @@ -15,44 +15,61 @@ public static void Example() // Download the squeeznet image model from ONNX model zoo, version 1.2 // https://github.com/onnx/models/tree/master/squeezenet or use // Microsoft.ML.Onnx.TestModels nuget. - // It's a multiclass classifier. It consumes an input "data_0" and produces - // an output "softmaxout_1". + // It's a multiclass classifier. It consumes an input "data_0" and + // produces an output "softmaxout_1". var modelPath = @"squeezenet\00000001\model.onnx"; // Create ML pipeline to score the data using OnnxScoringEstimator var mlContext = new MLContext(); - // Create in-memory data points. Its Image/Scores field is the input/output of the used ONNX model. + // Create in-memory data points. Its Image/Scores field is the + // input /output of the used ONNX model. var dataPoints = new ImageDataPoint[] { new ImageDataPoint(Color.Red), new ImageDataPoint(Color.Green) }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); - // Create a ML.NET pipeline which contains two steps. First, ExtractPixle is used to convert the 224x224 image to a 3x224x224 float tensor. - // Then the float tensor is fed into a ONNX model with an input called "data_0" and an output called "softmaxout_1". Note that "data_0" and - // "softmaxout_1" are model input and output names stored in the used ONNX model file. Users may need to inspect their own models to - // get the right input and output column names. - var pipeline = mlContext.Transforms.ExtractPixels("data_0", "Image") // Map column "Image" to column "data_0" - .Append(mlContext.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelPath)); // Map column "data_0" to column "softmaxout_1" + // Create a ML.NET pipeline which contains two steps. First, + // ExtractPixle is used to convert the 224x224 image to a 3x224x224 + // float tensor. Then the float tensor is fed into a ONNX model with an + // input called "data_0" and an output called "softmaxout_1". Note that + // "data_0" and "softmaxout_1" are model input and output names stored + // in the used ONNX model file. Users may need to inspect their own + // models to get the right input and output column names. + // Map column "Image" to column "data_0" + // Map column "data_0" to column "softmaxout_1" + var pipeline = mlContext.Transforms.ExtractPixels("data_0", "Image") + .Append(mlContext.Transforms.ApplyOnnxModel("softmaxout_1", + "data_0", modelPath)); + var model = pipeline.Fit(dataView); var onnx = model.Transform(dataView); - // Convert IDataView back to IEnumerable so that user can inspect the output, column "softmaxout_1", of the ONNX transform. - // Note that Column "softmaxout_1" would be stored in ImageDataPont.Scores because the added attributed [ColumnName("softmaxout_1")] - // tells that ImageDataPont.Scores is equivalent to column "softmaxout_1". - var transformedDataPoints = mlContext.Data.CreateEnumerable(onnx, false).ToList(); + // Convert IDataView back to IEnumerable so that user + // can inspect the output, column "softmaxout_1", of the ONNX transform. + // Note that Column "softmaxout_1" would be stored in ImageDataPont + //.Scores because the added attributed [ColumnName("softmaxout_1")] + // tells that ImageDataPont.Scores is equivalent to column + // "softmaxout_1". + var transformedDataPoints = mlContext.Data.CreateEnumerable< + ImageDataPoint>(onnx, false).ToList(); - // The scores are probabilities of all possible classes, so they should all be positive. + // The scores are probabilities of all possible classes, so they should + // all be positive. foreach (var dataPoint in transformedDataPoints) { var firstClassProb = dataPoint.Scores.First(); var lastClassProb = dataPoint.Scores.Last(); - Console.WriteLine($"The probability of being the first class is {firstClassProb * 100}%."); - Console.WriteLine($"The probability of being the last class is {lastClassProb * 100}%."); + Console.WriteLine("The probability of being the first class is " + + (firstClassProb * 100) + "%."); + + Console.WriteLine($"The probability of being the last class is " + + (lastClassProb * 100) + "%."); } // Expected output: @@ -62,7 +79,8 @@ public static void Example() // The probability of being the last class is 0.394428%. } - // This class is used in Example() to describe data points which will be consumed by ML.NET pipeline. + // This class is used in Example() to describe data points which will be + // consumed by ML.NET pipeline. private class ImageDataPoint { // Height of Image. @@ -75,9 +93,9 @@ private class ImageDataPoint [ImageType(height, width)] public Bitmap Image { get; set; } - // Expected output of ONNX model. It contains probabilities of all classes. - // Note that the ColumnName below should match the output name in the used - // ONNX model file. + // Expected output of ONNX model. It contains probabilities of all + // classes. Note that the ColumnName below should match the output name + // in the used ONNX model file. [ColumnName("softmaxout_1")] public float[] Scores { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyOnnxModel.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyOnnxModel.cs index abff3a339d..adb2268060 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyOnnxModel.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyOnnxModel.cs @@ -19,14 +19,16 @@ public static void Example() // Generate sample test data. var samples = GetTensorData(); - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); // Create the pipeline to score using provided onnx model. var pipeline = mlContext.Transforms.ApplyOnnxModel(modelPath); // Fit the pipeline and get the transformed values var transformedValues = pipeline.Fit(data).Transform(data); // Retrieve model scores into Prediction class - var predictions = mlContext.Data.CreateEnumerable(transformedValues, reuseRowObject: false); + var predictions = mlContext.Data.CreateEnumerable( + transformedValues, reuseRowObject: false); // Iterate rows foreach (var prediction in predictions) @@ -34,7 +36,8 @@ public static void Example() int numClasses = 0; foreach (var classScore in prediction.softmaxout_1.Take(3)) { - Console.WriteLine($"Class #{numClasses++} score = {classScore}"); + Console.WriteLine("Class #" + numClasses++ + " score = " + + classScore); } Console.WriteLine(new string('-', 10)); } @@ -65,9 +68,14 @@ public class TensorData public static TensorData[] GetTensorData() { // This can be any numerical data. Assume image pixel values. - var image1 = Enumerable.Range(0, inputSize).Select(x => (float)x / inputSize).ToArray(); - var image2 = Enumerable.Range(0, inputSize).Select(x => (float)(x + 10000) / inputSize).ToArray(); - return new TensorData[] { new TensorData() { data_0 = image1 }, new TensorData() { data_0 = image2 } }; + var image1 = Enumerable.Range(0, inputSize).Select(x => (float)x / + inputSize).ToArray(); + + var image2 = Enumerable.Range(0, inputSize).Select(x => (float)(x + + 10000) / inputSize).ToArray(); + + return new TensorData[] { new TensorData() { data_0 = image1 }, new + TensorData() { data_0 = image2 } }; } // Class to contain the output values from the transformation. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs index ce7152b50f..7b21512883 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs @@ -9,11 +9,12 @@ namespace Samples.Dynamic { public static class ApproximatedKernelMap { - // Transform feature vector to another non-linear space. See https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf. + // Transform feature vector to another non-linear space. See + // https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { @@ -22,19 +23,26 @@ public static void Example() new DataPoint(){ Features = new float[7] {-1, 1, 0,-1,-1, 0,-1} }, new DataPoint(){ Features = new float[7] { 0,-1, 0, 1, 0,-1,-1} } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // ApproximatedKernel map takes data and maps it's to a random low-dimensional space. - var approximation = mlContext.Transforms.ApproximatedKernelMap("Features", rank: 4, generator: new GaussianKernel(gamma: 0.7f), seed: 1); + // ApproximatedKernel map takes data and maps it's to a random + // low -dimensional space. + var approximation = mlContext.Transforms.ApproximatedKernelMap( + "Features", rank: 4, generator: new GaussianKernel(gamma: 0.7f), + seed: 1); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var tansformer = approximation.Fit(data); var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); + // Expected output: // -0.0119, 0.5867, 0.4942, 0.7041 // 0.4720, 0.5639, 0.4346, 0.2671 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContribution.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContribution.cs index 3a342f8809..2f0f5b464e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContribution.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContribution.cs @@ -9,7 +9,8 @@ public static class CalculateFeatureContribution { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(seed: 1); @@ -19,7 +20,8 @@ public static void Example() // Convert training data to IDataView. var data = mlContext.Data.LoadFromEnumerable(samples); - // Create a pipeline to concatenate the features into a feature vector and normalize it. + // Create a pipeline to concatenate the features into a feature vector + // and normalize it. var transformPipeline = mlContext.Transforms.Concatenate("Features", new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }) .Append(mlContext.Transforms.NormalizeMeanVariance("Features")); @@ -37,28 +39,45 @@ public static void Example() var linearModel = linearTrainer.Fit(transformedData); // Print the model parameters. Console.WriteLine($"Linear Model Parameters"); - Console.WriteLine($"Bias: {linearModel.Model.Bias} Feature1: {linearModel.Model.Weights[0]} Feature2: {linearModel.Model.Weights[1]}"); + Console.WriteLine("Bias: " + linearModel.Model.Bias+ " Feature1: " + + linearModel.Model.Weights[0] + " Feature2: " +linearModel.Model + .Weights[1]); - // Define a feature contribution calculator for all the features, and don't normalize the contributions. - // These are "trivial estimators" and they don't need to fit to the data, so we can feed a subset. - var simpleScoredDataset = linearModel.Transform(mlContext.Data.TakeRows(transformedData, 1)); - var linearFeatureContributionCalculator = mlContext.Transforms.CalculateFeatureContribution(linearModel, normalize: false).Fit(simpleScoredDataset); + // Define a feature contribution calculator for all the features, and + // don't normalize the contributions.These are "trivial estimators" and + // they don't need to fit to the data, so we can feed a subset. + var simpleScoredDataset = linearModel.Transform(mlContext.Data + .TakeRows(transformedData, 1)); + + var linearFeatureContributionCalculator = mlContext.Transforms + .CalculateFeatureContribution(linearModel, normalize: false).Fit( + simpleScoredDataset); // Create a transformer chain to describe the entire pipeline. - var scoringPipeline = transformer.Append(linearModel).Append(linearFeatureContributionCalculator); + var scoringPipeline = transformer.Append(linearModel).Append( + linearFeatureContributionCalculator); - // Create the prediction engine to get the features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(scoringPipeline); + // Create the prediction engine to get the features extracted from the + // text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(scoringPipeline); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples.First()); // Write out the prediction, with contributions. - // Note that for the linear model, the feature contributions for a feature in an example is the feature-weight*feature-value. + // Note that for the linear model, the feature contributions for a + // feature in an example is the feature-weight*feature-value. // The total prediction is thus the bias plus the feature contributions. - Console.WriteLine($"Label: {prediction.Label} Prediction: {prediction.Score}"); - Console.WriteLine($"Feature1: {prediction.Features[0]} Feature2: {prediction.Features[1]}"); - Console.WriteLine($"Feature Contributions: {prediction.FeatureContributions[0]} {prediction.FeatureContributions[1]}"); + Console.WriteLine("Label: " + prediction.Label + " Prediction: " + + prediction.Score); + + Console.WriteLine("Feature1: " + prediction.Features[0] + + " Feature2: " + prediction.Features[1]); + + Console.WriteLine("Feature Contributions: " + prediction + .FeatureContributions[0] + " " + prediction + .FeatureContributions[1]); // Expected output: // Linear Model Parameters @@ -107,7 +126,9 @@ private static IEnumerable GenerateData(int nExamples = 10000, }; // Create a noisy label. - data.Label = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + data.Label = (float)(bias + weight1 * data.Feature1 + weight2 * + data.Feature2 + rng.NextDouble() - 0.5); + yield return data; } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContributionCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContributionCalibrated.cs index c05e7e5468..adaf66389c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContributionCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContributionCalibrated.cs @@ -9,8 +9,9 @@ public static class CalculateFeatureContributionCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(); // Create a small dataset. @@ -19,7 +20,8 @@ public static void Example() // Convert training data to IDataView. var data = mlContext.Data.LoadFromEnumerable(samples); - // Create a pipeline to concatenate the features into a feature vector and normalize it. + // Create a pipeline to concatenate the features into a feature vector + // and normalize it. var transformPipeline = mlContext.Transforms.Concatenate("Features", new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }) .Append(mlContext.Transforms.NormalizeMeanVariance("Features")); @@ -31,7 +33,8 @@ public static void Example() var transformedData = transformer.Transform(data); // Define a linear trainer. - var linearTrainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(); + var linearTrainer = mlContext.BinaryClassification.Trainers + .SdcaLogisticRegression(); // Now we train the model and score it on the transformed data. var linearModel = linearTrainer.Fit(transformedData); @@ -42,26 +45,42 @@ public static void Example() linearModel.Model.SubModel.Weights[0], linearModel.Model.SubModel.Weights[1]); - // Define a feature contribution calculator for all the features, and don't normalize the contributions. - // These are "trivial estimators" and they don't need to fit to the data, so we can feed a subset. - var simpleScoredDataset = linearModel.Transform(mlContext.Data.TakeRows(transformedData, 1)); - var linearFeatureContributionCalculator = mlContext.Transforms.CalculateFeatureContribution(linearModel, normalize: false).Fit(simpleScoredDataset); + // Define a feature contribution calculator for all the features, and + // don't normalize the contributions. These are "trivial estimators" and + // they don't need to fit to the data, so we can feed a subset. + var simpleScoredDataset = linearModel.Transform(mlContext.Data + .TakeRows(transformedData, 1)); + + var linearFeatureContributionCalculator = mlContext.Transforms + .CalculateFeatureContribution(linearModel, normalize: false) + .Fit(simpleScoredDataset); // Create a transformer chain to describe the entire pipeline. - var scoringPipeline = transformer.Append(linearModel).Append(linearFeatureContributionCalculator); + var scoringPipeline = transformer.Append(linearModel) + .Append(linearFeatureContributionCalculator); - // Create the prediction engine to get the features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(scoringPipeline); + // Create the prediction engine to get the features extracted from the + // text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(scoringPipeline); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples.First()); // Write out the prediction, with contributions. - // Note that for the linear model, the feature contributions for a feature in an example is the feature-weight*feature-value. - // The total prediction is thus the bias plus the feature contributions. - Console.WriteLine($"Label: {prediction.Label} Prediction-Score: {prediction.Score} Prediction-Probability: {prediction.Probability}"); - Console.WriteLine($"Feature1: {prediction.Features[0]} Feature2: {prediction.Features[1]}"); - Console.WriteLine($"Feature Contributions: {prediction.FeatureContributions[0]} {prediction.FeatureContributions[1]}"); + // Note that for the linear model, the feature contributions for a + // feature in an example is the feature-weight*feature-value. The total + // prediction is thus the bias plus the feature contributions. + Console.WriteLine("Label: " + prediction.Label + " Prediction-Score: " + + prediction.Score + " Prediction-Probability: " + prediction + .Probability); + + Console.WriteLine("Feature1: " + prediction.Features[0] + " Feature2: " + + prediction.Features[1]); + + Console.WriteLine("Feature Contributions: " + prediction + .FeatureContributions[0] + " " + prediction + .FeatureContributions[1]); // Expected output: // Linear Model Parameters @@ -114,7 +133,9 @@ private static IEnumerable GenerateData(int nExamples = 10000, }; // Create a Boolean label with noise. - var value = bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5; + var value = bias + weight1 * data.Feature1 + weight2 * data.Feature2 + + rng.NextDouble() - 0.5; + data.Label = Sigmoid(value) > 0.5; yield return data; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs index 9b350227be..c62cc6b743 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs @@ -9,38 +9,56 @@ public static class Concatenate { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new InputData(){ Feature1 = 0.1f, Feature2 = new[]{ 1.1f, 2.1f, 3.1f}, Feature3 = 1 }, - new InputData(){ Feature1 = 0.2f, Feature2 = new[]{ 1.2f, 2.2f, 3.2f}, Feature3 = 2 }, - new InputData(){ Feature1 = 0.3f, Feature2 = new[]{ 1.3f, 2.3f, 3.3f}, Feature3 = 3 }, - new InputData(){ Feature1 = 0.4f, Feature2 = new[]{ 1.4f, 2.4f, 3.4f}, Feature3 = 4 }, - new InputData(){ Feature1 = 0.5f, Feature2 = new[]{ 1.5f, 2.5f, 3.5f}, Feature3 = 5 }, - new InputData(){ Feature1 = 0.6f, Feature2 = new[]{ 1.6f, 2.6f, 3.6f}, Feature3 = 6 }, + new InputData(){ Feature1 = 0.1f, Feature2 = new[]{ 1.1f, 2.1f, + 3.1f }, Feature3 = 1 }, + + new InputData(){ Feature1 = 0.2f, Feature2 = new[]{ 1.2f, 2.2f, + 3.2f }, Feature3 = 2 }, + + new InputData(){ Feature1 = 0.3f, Feature2 = new[]{ 1.3f, 2.3f, + 3.3f }, Feature3 = 3 }, + + new InputData(){ Feature1 = 0.4f, Feature2 = new[]{ 1.4f, 2.4f, + 3.4f }, Feature3 = 4 }, + + new InputData(){ Feature1 = 0.5f, Feature2 = new[]{ 1.5f, 2.5f, + 3.5f }, Feature3 = 5 }, + + new InputData(){ Feature1 = 0.6f, Feature2 = new[]{ 1.6f, 2.6f, + 3.6f }, Feature3 = 6 }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); - // A pipeline for concatenating the "Feature1", "Feature2" and "Feature3" columns together into a vector that will be the Features column. - // Concatenation is necessary because trainers take feature vectors as inputs. + // A pipeline for concatenating the "Feature1", "Feature2" and + // "Feature3" columns together into a vector that will be the Features + // column. Concatenation is necessary because trainers take feature + // vectors as inputs. // - // Please note that the "Feature3" column is converted from int32 to float using the ConvertType. - // The Concatenate requires all columns to be of same type. - var pipeline = mlContext.Transforms.Conversion.ConvertType("Feature3", outputKind: DataKind.Single) - .Append(mlContext.Transforms.Concatenate("Features", new[] { "Feature1", "Feature2", "Feature3" })); + // Please note that the "Feature3" column is converted from int32 to + // float using the ConvertType. The Concatenate requires all columns to + // be of same type. + var pipeline = mlContext.Transforms.Conversion.ConvertType("Feature3", + outputKind: DataKind.Single) + .Append(mlContext.Transforms.Concatenate("Features", new[] + { "Feature1", "Feature2", "Feature3" })); // The transformed data. var transformedData = pipeline.Fit(dataview).Transform(dataview); // Now let's take a look at what this concatenation did. - // We can extract the newly created column as an IEnumerable of TransformedData. - var featuresColumn = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // TransformedData. + var featuresColumn = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: false); // And we can write out a few rows Console.WriteLine($"Features column obtained post-transformation."); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs index 212292f087..a4a7498e0c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs @@ -8,40 +8,59 @@ public static class CopyColumns { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new InputData(){ ImageId = 1, Features = new [] { 1.0f, 1.0f, 1.0f} }, - new InputData(){ ImageId = 2, Features = new [] { 2.0f, 2.0f, 2.0f} }, - new InputData(){ ImageId = 3, Features = new [] { 3.0f, 3.0f, 3.0f} }, - new InputData(){ ImageId = 4, Features = new [] { 4.0f, 4.0f, 4.0f} }, - new InputData(){ ImageId = 5, Features = new [] { 5.0f, 5.0f, 5.0f} }, - new InputData(){ ImageId = 6, Features = new [] { 6.0f, 6.0f, 6.0f} }, + new InputData(){ ImageId = 1, Features = new [] { 1.0f, 1.0f, + 1.0f } }, + + new InputData(){ ImageId = 2, Features = new [] { 2.0f, 2.0f, + 2.0f } }, + + new InputData(){ ImageId = 3, Features = new [] { 3.0f, 3.0f, + 3.0f } }, + + new InputData(){ ImageId = 4, Features = new [] { 4.0f, 4.0f, + 4.0f } }, + + new InputData(){ ImageId = 5, Features = new [] { 5.0f, 5.0f, + 5.0f } }, + + new InputData(){ ImageId = 6, Features = new [] { 6.0f, 6.0f, + 6.0f } }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); // CopyColumns is commonly used to rename columns. - // For example, if you want to train towards ImageId, and your trainer expects a "Label" column, you can - // use CopyColumns to rename ImageId to Label. Technically, the ImageId column still exists, but it won't be - // materialized unless you actually need it somewhere (e.g. if you were to save the transformed data - // without explicitly dropping the column). This is a general property of IDataView's lazy evaluation. + // For example, if you want to train towards ImageId, and your trainer + // expects a "Label" column, you can use CopyColumns to rename ImageId + // to Label. Technically, the ImageId column still exists, but it won't + // be materialized unless you actually need it somewhere (e.g. if you + // were to save the transformed data without explicitly dropping the + // column). This is a general property of IDataView's lazy evaluation. var pipeline = mlContext.Transforms.CopyColumns("Label", "ImageId"); - // Now we can transform the data and look at the output to confirm the behavior of CopyColumns. - // Don't forget that this operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of CopyColumns. Don't forget that this operation doesn't + // actually evaluate data until we read the data below. var transformedData = pipeline.Fit(dataview).Transform(dataview); - // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below. - var rowEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // SampleInfertDataTransformed, the class we define below. + var rowEnumerable = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. + Console.WriteLine($"Label and ImageId columns obtained " + + $"post-transformation."); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. - Console.WriteLine($"Label and ImageId columns obtained post-transformation."); foreach (var row in rowEnumerable) Console.WriteLine($"Label: {row.Label} ImageId: {row.ImageId}"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs index 9a0e7aabf3..c3ad19caaf 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs @@ -8,8 +8,8 @@ public static class CustomMapping { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -22,21 +22,27 @@ public static void Example() }; var data = mlContext.Data.LoadFromEnumerable(samples); - // We define the custom mapping between input and output rows that will be applied by the transformation. + // We define the custom mapping between input and output rows that will + // be applied by the transformation. Action mapping = (input, output) => output.IsUnderThirty = input.Age < 30; - // Custom transformations can be used to transform data directly, or as part of a pipeline of estimators. - // Note: If contractName is null in the CustomMapping estimator, any pipeline of estimators containing it, - // cannot be saved and loaded back. - var pipeline = mlContext.Transforms.CustomMapping(mapping, contractName: null); + // Custom transformations can be used to transform data directly, or as + // part of a pipeline of estimators. Note: If contractName is null in + // the CustomMapping estimator, any pipeline of estimators containing + // it, cannot be saved and loaded back. + var pipeline = mlContext.Transforms.CustomMapping(mapping, contractName: + null); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var transformer = pipeline.Fit(data); var transformedData = transformer.Transform(data); - var dataEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: true); + var dataEnumerable = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: true); + Console.WriteLine("Age\t IsUnderThirty"); foreach (var row in dataEnumerable) Console.WriteLine($"{row.Age}\t {row.IsUnderThirty}"); @@ -49,7 +55,8 @@ public static void Example() // 28 True } - // Defines only the column to be generated by the custom mapping transformation in addition to the columns already present. + // Defines only the column to be generated by the custom mapping + // transformation in addition to the columns already present. private class CustomMappingOutput { public bool IsUnderThirty { get; set; } @@ -61,7 +68,8 @@ private class InputData public float Age { get; set; } } - // Defines the schema of the transformed data, which includes the new column IsUnderThirty. + // Defines the schema of the transformed data, which includes the new column + // IsUnderThirty. private class TransformedData : InputData { public bool IsUnderThirty { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSaveAndLoad.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSaveAndLoad.cs index 8bc4439190..745169ef4e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSaveAndLoad.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSaveAndLoad.cs @@ -9,8 +9,8 @@ public static class CustomMappingSaveAndLoad { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -23,23 +23,35 @@ public static void Example() }; var data = mlContext.Data.LoadFromEnumerable(samples); - // Custom transformations can be used to transform data directly, or as part of a pipeline of estimators. - var pipeline = mlContext.Transforms.CustomMapping(new IsUnderThirtyCustomAction().GetMapping(), contractName: "IsUnderThirty"); + // Custom transformations can be used to transform data directly, or as + // part of a pipeline of estimators. + var pipeline = mlContext.Transforms.CustomMapping(new + IsUnderThirtyCustomAction().GetMapping(), contractName: + "IsUnderThirty"); + var transformer = pipeline.Fit(data); - // To save and load the CustomMapping estimator, the assembly in which the custom action is defined needs to be registered in the - // environment. The following registers the assembly where IsUnderThirtyCustomAction is defined. - mlContext.ComponentCatalog.RegisterAssembly(typeof(IsUnderThirtyCustomAction).Assembly); + // To save and load the CustomMapping estimator, the assembly in which + // the custom action is defined needs to be registered in the + // environment. The following registers the assembly where + // IsUnderThirtyCustomAction is defined. + mlContext.ComponentCatalog.RegisterAssembly(typeof( + IsUnderThirtyCustomAction).Assembly); - // Now the transform pipeline can be saved and loaded through the usual MLContext method. + // Now the transform pipeline can be saved and loaded through the usual + // MLContext method. mlContext.Model.Save(transformer, data.Schema, "customTransform.zip"); - var loadedTransform = mlContext.Model.Load("customTransform.zip", out var inputSchema); + var loadedTransform = mlContext.Model.Load("customTransform.zip", out + var inputSchema); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var transformedData = loadedTransform.Transform(data); - var dataEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: true); + var dataEnumerable = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: true); + Console.WriteLine("Age\tIsUnderThirty"); foreach (var row in dataEnumerable) Console.WriteLine($"{row.Age}\t {row.IsUnderThirty}"); @@ -52,21 +64,25 @@ public static void Example() // 28 True } - // The custom action needs to implement the abstract class CustomMappingFactory, and needs to have attribute - // CustomMappingFactoryAttribute with argument equal to the contractName used to define the CustomMapping estimator - // which uses the action. + // The custom action needs to implement the abstract class + // CustomMappingFactory, and needs to have attribute + // CustomMappingFactoryAttribute with argument equal to the contractName + // used to define the CustomMapping estimator which uses the action. [CustomMappingFactoryAttribute("IsUnderThirty")] - private class IsUnderThirtyCustomAction : CustomMappingFactory + private class IsUnderThirtyCustomAction : CustomMappingFactory { - // We define the custom mapping between input and output rows that will be applied by the transformation. - public static void CustomAction(InputData input, CustomMappingOutput output) - => output.IsUnderThirty = input.Age < 30; + // We define the custom mapping between input and output rows that will + // be applied by the transformation. + public static void CustomAction(InputData input, CustomMappingOutput + output) => output.IsUnderThirty = input.Age < 30; public override Action GetMapping() => CustomAction; } - // Defines only the column to be generated by the custom mapping transformation in addition to the columns already present. + // Defines only the column to be generated by the custom mapping + // transformation in addition to the columns already present. private class CustomMappingOutput { public bool IsUnderThirty { get; set; } @@ -78,7 +94,8 @@ private class InputData public float Age { get; set; } } - // Defines the schema of the transformed data, which includes the new column IsUnderThirty. + // Defines the schema of the transformed data, which includes the new column + // IsUnderThirty. private class TransformedData : InputData { public bool IsUnderThirty { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingWithInMemoryCustomType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingWithInMemoryCustomType.cs index 688c5d1fe5..37fd961258 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingWithInMemoryCustomType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingWithInMemoryCustomType.cs @@ -12,34 +12,46 @@ static public void Example() { var mlContext = new MLContext(); // Build in-memory data. - var tribe = new List() { new AlienHero("ML.NET", 2, 1000, 2000, 3000, 4000, 5000, 6000, 7000) }; + var tribe = new List() { new AlienHero("ML.NET", 2, 1000, + 2000, 3000, 4000, 5000, 6000, 7000) }; // Build a ML.NET pipeline and make prediction. var tribeDataView = mlContext.Data.LoadFromEnumerable(tribe); - var pipeline = mlContext.Transforms.CustomMapping(AlienFusionProcess.GetMapping(), contractName: null); + var pipeline = mlContext.Transforms.CustomMapping(AlienFusionProcess + .GetMapping(), contractName: null); + var model = pipeline.Fit(tribeDataView); var tribeTransformed = model.Transform(tribeDataView); // Print out prediction produced by the model. - var firstAlien = mlContext.Data.CreateEnumerable(tribeTransformed, false).First(); - Console.WriteLine($"We got a super alien with name {firstAlien.Name}, age {firstAlien.Merged.Age}, " + - $"height {firstAlien.Merged.Height}, weight {firstAlien.Merged.Weight}, and {firstAlien.Merged.HandCount} hands."); + var firstAlien = mlContext.Data.CreateEnumerable( + tribeTransformed, false).First(); + + Console.WriteLine("We got a super alien with name " + firstAlien.Name + + ", age " + firstAlien.Merged.Age + ", " + "height " + firstAlien + .Merged.Height + ", weight " + firstAlien.Merged.Weight + ", and " + + firstAlien.Merged.HandCount + " hands."); // Expected output: // We got a super alien with name Super Unknown, age 4002, height 6000, weight 8000, and 10000 hands. // Create a prediction engine and print out its prediction. - var engine = mlContext.Model.CreatePredictionEngine(model); + var engine = mlContext.Model.CreatePredictionEngine(model); + var alien = new AlienHero("TEN.LM", 1, 2, 3, 4, 5, 6, 7, 8); var superAlien = engine.Predict(alien); - Console.Write($"We got a super alien with name {superAlien.Name}, age {superAlien.Merged.Age}, " + - $"height {superAlien.Merged.Height}, weight {superAlien.Merged.Weight}, and {superAlien.Merged.HandCount} hands."); + Console.Write("We got a super alien with name " + superAlien.Name + + ", age " + superAlien.Merged.Age + ", height " + + superAlien.Merged.Height + ", weight " + superAlien.Merged.Weight + + ", and " + superAlien.Merged.HandCount + " hands."); // Expected output: // We got a super alien with name Super Unknown, age 6, height 8, weight 10, and 12 hands. } - // A custom type which ML.NET doesn't know yet. Its value will be loaded as a DataView column in this test. + // A custom type which ML.NET doesn't know yet. Its value will be loaded as + // a DataView column in this test. private class AlienBody { public int Age { get; set; } @@ -67,11 +79,12 @@ public AlienTypeAttributeAttribute(int raceId) RaceId = raceId; } - // A function implicitly invoked by ML.NET when processing a custom type. - // It binds a DataViewType to a custom type plus its attributes. + // A function implicitly invoked by ML.NET when processing a custom + // type. It binds a DataViewType to a custom type plus its attributes. public override void Register() { - DataViewTypeManager.Register(new DataViewAlienBodyType(RaceId), typeof(AlienBody), new[] { this }); + DataViewTypeManager.Register(new DataViewAlienBodyType(RaceId), + typeof(AlienBody), new[] { this }); } public override bool Equals(DataViewTypeAttribute other) @@ -84,11 +97,14 @@ public override bool Equals(DataViewTypeAttribute other) public override int GetHashCode() => RaceId.GetHashCode(); } - // A custom class with a type which ML.NET doesn't know yet. Its value will be loaded as a DataView row in this test. - // It will be the input of AlienFusionProcess.MergeBody(AlienHero, SuperAlienHero). + // A custom class with a type which ML.NET doesn't know yet. Its value will + // be loaded as a DataView row in this test. It will be the input of + // AlienFusionProcess.MergeBody(AlienHero, SuperAlienHero). // - // The members One> and Two" would be mapped to different types inside ML.NET type system because they - // have different AlienTypeAttributeAttribute's. For example, the column type of One would be DataViewAlienBodyType + // The members One> and Two" would be mapped to different types inside + // ML.NET type system because they have different + // AlienTypeAttributeAttribute's. For example, the column type of One would + // be DataViewAlienBodyType // with RaceId=100. // private class AlienHero @@ -110,11 +126,13 @@ public AlienHero() public AlienHero(string name, int age, float height, float weight, int handCount, - int anotherAge, float anotherHeight, float anotherWeight, int anotherHandCount) + int anotherAge, float anotherHeight, float anotherWeight, int + anotherHandCount) { Name = "Unknown"; One = new AlienBody(age, height, weight, handCount); - Two = new AlienBody(anotherAge, anotherHeight, anotherWeight, anotherHandCount); + Two = new AlienBody(anotherAge, anotherHeight, anotherWeight, + anotherHandCount); } } @@ -142,7 +160,8 @@ public override int GetHashCode() } } - // The output type of processing AlienHero using AlienFusionProcess.MergeBody(AlienHero, SuperAlienHero). + // The output type of processing AlienHero using AlienFusionProcess + // .MergeBody(AlienHero, SuperAlienHero). private class SuperAlienHero { public string Name { get; set; } @@ -157,7 +176,8 @@ public SuperAlienHero() } } - // The implementation of custom mapping is MergeBody. It accepts AlienHero and produces SuperAlienHero. + // The implementation of custom mapping is MergeBody. It accepts AlienHero + // and produces SuperAlienHero. private class AlienFusionProcess { public static void MergeBody(AlienHero input, SuperAlienHero output) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs index 17188bd928..110aa3b2ef 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs @@ -8,19 +8,30 @@ public static class DropColumns { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new InputData(){ Age = 21, Gender = "Male", Education = "BS", ExtraColumn = 1 }, - new InputData(){ Age = 23, Gender = "Female", Education = "MBA", ExtraColumn = 2 }, - new InputData(){ Age = 28, Gender = "Male", Education = "PhD", ExtraColumn = 3 }, - new InputData(){ Age = 22, Gender = "Male", Education = "BS", ExtraColumn = 4 }, - new InputData(){ Age = 23, Gender = "Female", Education = "MS", ExtraColumn = 5 }, - new InputData(){ Age = 27, Gender = "Female", Education = "PhD", ExtraColumn = 6 }, + new InputData(){ Age = 21, Gender = "Male", Education = "BS", + ExtraColumn = 1 }, + + new InputData(){ Age = 23, Gender = "Female", Education = "MBA", + ExtraColumn = 2 }, + + new InputData(){ Age = 28, Gender = "Male", Education = "PhD", + ExtraColumn = 3 }, + + new InputData(){ Age = 22, Gender = "Male", Education = "BS", + ExtraColumn = 4 }, + + new InputData(){ Age = 23, Gender = "Female", Education = "MS", + ExtraColumn = 5 }, + + new InputData(){ Age = 27, Gender = "Female", Education = "PhD", + ExtraColumn = 6 }, }; // Convert training data to IDataView. @@ -30,31 +41,40 @@ public static void Example() var pipeline = mlContext.Transforms.DropColumns("ExtraColumn"); // Now we can transform the data and look at the output. - // Don't forget that this operation doesn't actually operate on data until we perform an action that requires + // Don't forget that this operation doesn't actually operate on data + // until we perform an action that requires // the data to be materialized. var transformedData = pipeline.Fit(dataview).Transform(dataview); // Now let's take a look at what the DropColumns operations did. - // We can extract the transformed data as an IEnumerable of InputData, the class we define below. - // When we try to pull out the Age, Gender, Education and ExtraColumn columns, ML.NET will raise an exception on the ExtraColumn + // We can extract the transformed data as an IEnumerable of InputData, + // the class we define below. When we try to pull out the Age, Gender, + // Education and ExtraColumn columns, ML.NET will raise an exception on + // the ExtraColumn try { - var failingRowEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + var failingRowEnumerable = mlContext.Data.CreateEnumerable< + InputData>(transformedData, reuseRowObject: false); } catch (ArgumentOutOfRangeException exception) { - Console.WriteLine($"ExtraColumn is not available, so an exception is thrown: {exception.Message}."); + Console.WriteLine($"ExtraColumn is not available, so an exception" + + $" is thrown: {exception.Message}."); } // Expected output: // ExtraColumn is not available, so an exception is thrown: Could not find column 'ExtraColumn'. // Parameter name: Schema - // And we can write a few columns out to see that the rest of the data is still available. - var rowEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // And we can write a few columns out to see that the rest of the data + // is still available. + var rowEnumerable = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + Console.WriteLine($"The columns we didn't drop are still available."); foreach (var row in rowEnumerable) - Console.WriteLine($"Age: {row.Age} Gender: {row.Gender} Education: {row.Education}"); + Console.WriteLine($"Age: {row.Age} Gender: {row.Gender} " + + $"Education: {row.Education}"); // Expected output: // The columns we didn't drop are still available. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs index 7e880b90d5..b4eb2bf21e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs @@ -10,8 +10,8 @@ public static class IndicateMissingValues { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -23,21 +23,29 @@ public static void Example() }; var data = mlContext.Data.LoadFromEnumerable(samples); - // IndicateMissingValues is used to create a boolean containing 'true' where the value in the - // input column is missing. For floats and doubles, missing values are represented as NaN. - var pipeline = mlContext.Transforms.IndicateMissingValues("MissingIndicator", "Features"); + // IndicateMissingValues is used to create a boolean containing 'true' + // where the value in the input column is missing. For floats and + // doubles, missing values are represented as NaN. + var pipeline = mlContext.Transforms.IndicateMissingValues( + "MissingIndicator", "Features"); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var tansformer = pipeline.Fit(data); var transformedData = tansformer.Transform(data); - // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. - var rowEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // SampleDataTransformed, the class we define below. + var rowEnumerable = mlContext.Data.CreateEnumerable< + SampleDataTransformed>(transformedData, reuseRowObject: false); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. foreach (var row in rowEnumerable) - Console.WriteLine($"Features: [{string.Join(", ", row.Features)}]\t MissingIndicator: [{string.Join(", ", row.MissingIndicator)}]"); + Console.WriteLine("Features: [" + string.Join(", ", row.Features) + + "]\t MissingIndicator: [" + string.Join(", ", row + .MissingIndicator) + "]"); // Expected output: // Features: [1, 1, 0] MissingIndicator: [False, False, False] diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValuesMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValuesMultiColumn.cs index 830fb9d047..38750ed0a6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValuesMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValuesMultiColumn.cs @@ -9,40 +9,53 @@ public static class IndicateMissingValuesMultiColumn { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. var samples = new List() { - new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 = new float[2] {1, 1} }, - new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1}, Features2 = new float[2] {float.NaN, 1} }, - new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3}, Features2 = new float[2] {1, float.PositiveInfinity} }, + new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 = + new float[2] {1, 1} }, + + new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1}, + Features2 = new float[2] {float.NaN, 1} }, + + new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3}, + Features2 = new float[2] {1, float.PositiveInfinity} }, }; var data = mlContext.Data.LoadFromEnumerable(samples); - // IndicateMissingValues is used to create a boolean containing 'true' where the value in the - // input column is missing. For floats and doubles, missing values are NaN. - // We can use an array of InputOutputColumnPair to apply the MissingValueIndicatorEstimator + // IndicateMissingValues is used to create a boolean containing 'true' + // where the value in the input column is missing. For floats and + // doubles, missing values are NaN. We can use an array of + // InputOutputColumnPair to apply the MissingValueIndicatorEstimator // to multiple columns in one pass over the data. var pipeline = mlContext.Transforms.IndicateMissingValues(new[] { new InputOutputColumnPair("MissingIndicator1", "Features1"), new InputOutputColumnPair("MissingIndicator2", "Features2") }); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var tansformer = pipeline.Fit(data); var transformedData = tansformer.Transform(data); - // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. - var rowEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // SampleDataTransformed, the class we define below. + var rowEnumerable = mlContext.Data.CreateEnumerable< + SampleDataTransformed>(transformedData, reuseRowObject: false); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. foreach (var row in rowEnumerable) - Console.WriteLine($"Features1: [{string.Join(", ", row.Features1)}]\t MissingIndicator1: [{string.Join(", ", row.MissingIndicator1)}]\t " + - $"Features2: [{string.Join(", ", row.Features2)}]\t MissingIndicator2: [{string.Join(", ", row.MissingIndicator2)}]"); + Console.WriteLine("Features1: [" + string.Join(", ", row + .Features1) + "]\t MissingIndicator1: [" + string.Join(", ", + row.MissingIndicator1) + "]\t Features2: [" + string.Join(", ", + row.Features2) + "]\t MissingIndicator2: [" + string.Join(", ", + row.MissingIndicator2) + "]"); // Expected output: // Features1: [1, 1, 0] MissingIndicator1: [False, False, False] Features2: [1, 1] MissingIndicator2: [False, False] diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs index ba23d87e2f..27315443c7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -12,7 +12,8 @@ public class NormalizeBinning { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() @@ -22,60 +23,91 @@ public static void Example() new DataPoint(){ Features = new float[4] { 4, 0, 1, 0} }, new DataPoint(){ Features = new float[4] { 2,-1,-1, 1} } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on + // NormalizeBinning normalizes the data by constructing equidensity bins + // and produce output based on // to which bin the original value belongs. - var normalize = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: false); + var normalize = mlContext.Transforms.NormalizeBinning("Features", + maximumBinCount: 4, fixZero: false); - // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on - // to which bin original value belong but make sure zero values would remain zero after normalization. - // Helps preserve sparsity. - var normalizeFixZero = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: true); + // NormalizeBinning normalizes the data by constructing equidensity bins + // and produce output based on to which bin original value belong but + // make sure zero values would remain zero after normalization. Helps + // preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeBinning("Features", + maximumBinCount: 4, fixZero: true); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var normalizeTransform = normalize.Fit(data); var transformedData = normalizeTransform.Transform(data); var normalizeFixZeroTransform = normalizeFixZero.Fit(data); var fixZeroData = normalizeFixZeroTransform.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 1.0000, 0.6667, 1.0000, 0.0000 // 0.6667, 1.0000, 0.6667, 0.0000 // 0.3333, 0.3333, 0.3333, 0.0000 // 0.0000, 0.0000, 0.0000, 1.0000 - var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + var columnFixZero = fixZeroData.GetColumn("Features") + .ToArray(); + foreach (var row in columnFixZero) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 1.0000, 0.3333, 1.0000, 0.0000 // 0.6667, 0.6667, 0.6667, 0.0000 // 0.3333, 0.0000, 0.3333, 0.0000 // 0.0000, -0.3333, 0.0000, 1.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. - var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; + // Let's get transformation parameters. Since we work with only one + // column we need to pass 0 as parameter for + // GetNormalizerModelParameters. If we have multiple columns + // transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) + as BinNormalizerModelParameters>; + var density = transformParams.Density[0]; - var offset = (transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0]); - Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}"); - Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); - Console.WriteLine($"Bins upper bounds are: {string.Join(" ", transformParams.UpperBounds[0])}"); + var offset = (transformParams.Offset.Length == 0 ? 0 : transformParams + .Offset[0]); + + Console.WriteLine($"The 0-index value in resulting array would be " + + $"produce by: y = (Index(x) / {density}) - {offset}"); + + Console.WriteLine("Where Index(x) is the index of the bin to which " + + "x belongs"); + + Console.WriteLine("Bins upper bounds are: " + string.Join(" ", + transformParams.UpperBounds[0])); // Expected output: // The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0 // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: 3 5 7 ∞ - var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>); + var fixZeroParams = (normalizeFixZeroTransform + .GetNormalizerModelParameters(0) as BinNormalizerModelParameters< + ImmutableArray>); + density = fixZeroParams.Density[1]; - offset = (fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1]); - Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}"); - Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); - Console.WriteLine($"Bins upper bounds are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + offset = (fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams + .Offset[1]); + + Console.WriteLine($"The 0-index value in resulting array would be " + + $"produce by: y = (Index(x) / {density}) - {offset}"); + + Console.WriteLine("Where Index(x) is the index of the bin to which x " + + "belongs"); + + Console.WriteLine("Bins upper bounds are: " + string.Join(" ", + fixZeroParams.UpperBounds[1])); // Expected output: // The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0.3333333 // Where Index(x) is the index of the bin to which x belongs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinningMulticolumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinningMulticolumn.cs index 90b9360aa4..9d6ef29ed7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinningMulticolumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinningMulticolumn.cs @@ -12,35 +12,45 @@ public class NormalizeBinningMulticolumn { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { - new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, Features2 = 1 }, - new DataPoint(){ Features = new float[4] { 6, 2, 2, 0}, Features2 = 4 }, - new DataPoint(){ Features = new float[4] { 4, 0, 1, 0}, Features2 = 1 }, - new DataPoint(){ Features = new float[4] { 2,-1,-1, 1}, Features2 = 2 } + new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, + Features2 = 1 }, + + new DataPoint(){ Features = new float[4] { 6, 2, 2, 0}, + Features2 = 4 }, + + new DataPoint(){ Features = new float[4] { 4, 0, 1, 0}, + Features2 = 1 }, + + new DataPoint(){ Features = new float[4] { 2,-1,-1, 1}, + Features2 = 2 } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on - // to which bin the original value belongs. + // NormalizeBinning normalizes the data by constructing equidensity bins + // and produce output based on to which bin the original value belongs. var normalize = mlContext.Transforms.NormalizeBinning(new[]{ new InputOutputColumnPair("Features"), new InputOutputColumnPair("Features2"), }, maximumBinCount: 4, fixZero: false); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var normalizeTransform = normalize.Fit(data); var transformedData = normalizeTransform.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); var column2 = transformedData.GetColumn("Features2").ToArray(); for(int i=0; i< column.Length; i++) - Console.WriteLine(string.Join(", ", column[i].Select(x => x.ToString("f4")))+"\t\t"+column2[i]); + Console.WriteLine(string.Join(", ", column[i].Select(x => x + .ToString("f4")))+"\t\t"+column2[i]); // Expected output: // // Features Feature2 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs index 322997ca3b..8d4795a1b6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -10,8 +10,8 @@ class NormalizeGlobalContrast { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { @@ -20,18 +20,23 @@ public static void Example() new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} }, new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true); + var approximation = mlContext.Transforms.NormalizeGlobalContrast( + "Features", ensureZeroMean: false, scale:2, + ensureUnitStandardDeviation:true); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var tansformer = approximation.Fit(data); var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 2.0000, 2.0000,-2.0000,-2.0000 // 2.0000, 2.0000,-2.0000,-2.0000 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs index 37d302c022..3ef43d9f85 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -12,8 +12,8 @@ public class NormalizeLogMeanVariance { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { @@ -22,24 +22,32 @@ public static void Example() new DataPoint(){ Features = new float[5] { 0, 0, 1, 0, 0} }, new DataPoint(){ Features = new float[5] {-1,-1,-1, 1, 1} } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data. + // NormalizeLogMeanVariance normalizes the data based on the computed + // mean and variance of the logarithm of the data. // Uses Cumulative distribution function as output. - var normalize = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: true); + var normalize = mlContext.Transforms.NormalizeLogMeanVariance( + "Features", useCdf: true); - // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data. - var normalizeNoCdf = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: false); + // NormalizeLogMeanVariance normalizes the data based on the computed + // mean and variance of the logarithm of the data. + var normalizeNoCdf = mlContext.Transforms.NormalizeLogMeanVariance( + "Features", useCdf: false); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data + // below. var normalizeTransform = normalize.Fit(data); var transformedData = normalizeTransform.Transform(data); var normalizeNoCdfTransform = normalizeNoCdf.Fit(data); var noCdfData = normalizeNoCdfTransform.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 0.1587, 0.1587, 0.8654, 0.0000, 0.8413 // 0.8413, 0.8413, 0.5837, 0.0000, 0.0000 @@ -48,27 +56,38 @@ public static void Example() var columnFixZero = noCdfData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 1.8854, 1.8854, 5.2970, 0.0000, 7670682000000000000000000000000000000.0000 // 4.7708, 4.7708, 3.0925, 0.0000, -7670682000000000000000000000000000000.0000 // -1.0000,-1.0000, 0.8879, 0.0000, -1.0000 // -3.8854,-3.8854,-3.5213, 0.0000, -0.9775 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. - var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; - Console.WriteLine("The 1-index value in resulting array would be produce by:"); - Console.WriteLine($"y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + // Let's get transformation parameters. Since we work with only one + // column we need to pass 0 as parameter for + // GetNormalizerModelParameters. If we have multiple columns + // transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) + as CdfNormalizerModelParameters>; + + Console.WriteLine("The 1-index value in resulting array would be " + + "produce by:"); + + Console.WriteLine("y = 0.5* (1 + ERF((Math.Log(x)- " + transformParams + .Mean[1] + ") / (" + transformParams.StandardDeviation[1] + + " * sqrt(2)))"); // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: // The 1-index value in resulting array would be produce by: - // y = 0.5 * (1 + ERF((Math.Log(x) - 0.3465736) / (0.3465736 * sqrt(2))) - var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; + // y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) + var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters( + 0) as AffineNormalizerModelParameters>; var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1]; var scale = noCdfParams.Scale[1]; - Console.WriteLine($"The 1-index value in resulting array would be produce by: y = (x - ({offset})) * {scale}"); + Console.WriteLine($"The 1-index value in resulting array would be " + + $"produce by: y = (x - ({offset})) * {scale}"); // Expected output: // The 1-index value in resulting array would be produce by: y = (x - (0.3465736)) * 2.88539 } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs index 83883ae49e..c6a020b850 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -11,8 +11,8 @@ class NormalizeLpNorm { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { @@ -21,18 +21,23 @@ public static void Example() new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} }, new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true); + var approximation = mlContext.Transforms.NormalizeLpNorm("Features", + norm: LpNormNormalizingEstimatorBase.NormFunction.L1, + ensureZeroMean: true); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var tansformer = approximation.Fit(data); var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 0.2500, 0.2500, -0.2500, -0.2500 // 0.2500, 0.2500, -0.2500, -0.2500 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs index ad35d43e6f..cccbb58f1b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -12,8 +12,8 @@ public class NormalizeMeanVariance { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { @@ -22,24 +22,31 @@ public static void Example() new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data. - // Uses Cumulative distribution function as output. - var normalize = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: true); + // NormalizeMeanVariance normalizes the data based on the computed mean + // and variance of the data. Uses Cumulative distribution function as + // output. + var normalize = mlContext.Transforms.NormalizeMeanVariance("Features", + useCdf: true); - // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data. - var normalizeNoCdf = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: false); + // NormalizeMeanVariance normalizes the data based on the computed mean + // and variance of the data. + var normalizeNoCdf = mlContext.Transforms.NormalizeMeanVariance( + "Features", useCdf: false); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var normalizeTransform = normalize.Fit(data); var transformedData = normalizeTransform.Transform(data); var normalizeNoCdfTransform = normalizeNoCdf.Fit(data); var noCdfData = normalizeNoCdfTransform.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 0.6726, 0.6726, 0.8816, 0.2819 // 0.9101, 0.9101, 0.6939, 0.2819 @@ -49,27 +56,40 @@ public static void Example() var columnFixZero = noCdfData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 0.8165, 0.8165, 1.5492, 0.0000 // 1.6330, 1.6330, 1.0328, 0.0000 // 0.0000, 0.0000, 0.5164, 0.0000 // -0.8165,-0.8165,-0.5164, 2.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. - var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; - Console.WriteLine($"The 1-index value in resulting array would be produce by:"); - Console.WriteLine($" y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + // Let's get transformation parameters. Since we work with only one + // column we need to pass 0 as parameter for + // GetNormalizerModelParameters. If we have multiple columns + // transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform + .GetNormalizerModelParameters(0) as CdfNormalizerModelParameters< + ImmutableArray>; + + Console.WriteLine($"The 1-index value in resulting array would " + + $"be produce by:"); + + Console.WriteLine(" y = 0.5* (1 + ERF((x- " + transformParams.Mean[1] + + ") / (" + transformParams.StandardDeviation[1] + " * sqrt(2)))"); // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: // The 1-index value in resulting array would be produce by: // y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) - var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; + var noCdfParams = normalizeNoCdfTransform + .GetNormalizerModelParameters(0) as + AffineNormalizerModelParameters>; + var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1]; var scale = noCdfParams.Scale[1]; - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({offset})) * {scale}"); + Console.WriteLine($"Values for slot 1 would be transfromed by " + + $"applying y = (x - ({offset})) * {scale}"); // Expected output: // The 1-index value in resulting array would be produce by: y = (x - (0)) * 0.8164966 } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs index 7b7a60d74e..f78056f9e6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -12,8 +12,8 @@ public class NormalizeMinMax { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { @@ -22,46 +22,63 @@ public static void Example() new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // NormalizeMinMax normalize rows by finding min and max values in each row slot - // and setting projection of min value to 0 and max to 1 and everything else to - // values in between. - var normalize = mlContext.Transforms.NormalizeMinMax("Features", fixZero: false); + // NormalizeMinMax normalize rows by finding min and max values in each + // row slot and setting projection of min value to 0 and max to 1 and + // everything else to values in between. + var normalize = mlContext.Transforms.NormalizeMinMax("Features", + fixZero: false); - // Normalize rows by finding min and max values in each row slot, but make sure - // zero values would remain zero after normalization. Helps preserve sparsity. - var normalizeFixZero = mlContext.Transforms.NormalizeMinMax("Features", fixZero: true); + // Normalize rows by finding min and max values in each row slot, but + // make sure zero values would remain zero after normalization. Helps + // preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeMinMax("Features", + fixZero: true); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var normalizeTransform = normalize.Fit(data); var transformedData = normalizeTransform.Transform(data); var normalizeFixZeroTransform = normalizeFixZero.Fit(data); var fixZeroData = normalizeFixZeroTransform.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 0.6667, 0.6667, 1.0000, 0.0000 // 1.0000, 1.0000, 0.7500, 0.0000 // 0.3333, 0.3333, 0.5000, 0.0000 // 0.0000, 0.0000, 0.0000, 1.0000 - var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + var columnFixZero = fixZeroData.GetColumn("Features") + .ToArray(); + foreach (var row in columnFixZero) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 0.5000, 0.5000, 1.0000, 0.0000 // 1.0000, 1.0000, 0.6667, 0.0000 // 0.0000, 0.0000, 0.3333, 0.0000 // -0.5000,-0.5000,-0.3333, 1.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. - var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; - Console.WriteLine($"The 1-index value in resulting array would be produce by:"); - Console.WriteLine($" y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); + // Let's get transformation parameters. Since we work with only one + // column we need to pass 0 as parameter for + // GetNormalizerModelParameters. If we have multiple columns + // transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) + as AffineNormalizerModelParameters>; + + Console.WriteLine($"The 1-index value in resulting array would be " + + $"produce by:"); + + Console.WriteLine(" y = (x - (" + (transformParams.Offset.Length == 0 ? + 0 : transformParams.Offset[1]) + ")) * " + transformParams + .Scale[1]); // Expected output: // The 1-index value in resulting array would be produce by: // y = (x - (-1)) * 0.3333333 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs index 63fde50a9e..9441324639 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -12,39 +12,58 @@ public class NormalizeSupervisedBinning { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { - new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, Bin="Bin1" }, - new DataPoint(){ Features = new float[4] { 6, 2, 2, 1}, Bin="Bin2" }, - new DataPoint(){ Features = new float[4] { 5, 3, 0, 2}, Bin="Bin2" }, - new DataPoint(){ Features = new float[4] { 4,-8, 1, 3}, Bin="Bin3" }, - new DataPoint(){ Features = new float[4] { 2,-5,-1, 4}, Bin="Bin3" } + new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, + Bin ="Bin1" }, + + new DataPoint(){ Features = new float[4] { 6, 2, 2, 1}, + Bin ="Bin2" }, + + new DataPoint(){ Features = new float[4] { 5, 3, 0, 2}, + Bin ="Bin2" }, + + new DataPoint(){ Features = new float[4] { 4,-8, 1, 3}, + Bin ="Bin3" }, + + new DataPoint(){ Features = new float[4] { 2,-5,-1, 4}, + Bin ="Bin3" } }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); // Let's transform "Bin" column from string to key. - data = mlContext.Transforms.Conversion.MapValueToKey("Bin").Fit(data).Transform(data); - // NormalizeSupervisedBinning normalizes the data by constructing bins based on correlation with the label column and produce output based on - // to which bin original value belong. - var normalize = mlContext.Transforms.NormalizeSupervisedBinning("Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, fixZero: false); - - // NormalizeSupervisedBinning normalizes the data by constructing bins based on correlation with the label column and produce output based on - // to which bin original value belong but make sure zero values would remain zero after normalization. - // Helps preserve sparsity. - var normalizeFixZero = mlContext.Transforms.NormalizeSupervisedBinning("Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, fixZero: true); - - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + data = mlContext.Transforms.Conversion.MapValueToKey("Bin").Fit(data) + .Transform(data); + // NormalizeSupervisedBinning normalizes the data by constructing bins + // based on correlation with the label column and produce output based + // on to which bin original value belong. + var normalize = mlContext.Transforms.NormalizeSupervisedBinning( + "Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, + fixZero: false); + + // NormalizeSupervisedBinning normalizes the data by constructing bins + // based on correlation with the label column and produce output based + // on to which bin original value belong but make sure zero values would + // remain zero after normalization. Helps preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeSupervisedBinning( + "Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, + fixZero: true); + + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var normalizeTransform = normalize.Fit(data); var transformedData = normalizeTransform.Transform(data); var normalizeFixZeroTransform = normalizeFixZero.Fit(data); var fixZeroData = normalizeFixZeroTransform.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 1.0000, 0.5000, 1.0000, 0.0000 // 0.5000, 1.0000, 0.0000, 0.5000 @@ -52,9 +71,12 @@ public static void Example() // 0.0000, 0.0000, 0.0000, 1.0000 // 0.0000, 0.0000, 0.0000, 1.0000 - var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + var columnFixZero = fixZeroData.GetColumn("Features") + .ToArray(); + foreach (var row in columnFixZero) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString( + "f4")))); // Expected output: // 1.0000, 0.0000, 1.0000, 0.0000 // 0.5000, 0.5000, 0.0000, 0.5000 @@ -62,24 +84,48 @@ public static void Example() // 0.0000,-0.5000, 0.0000, 1.0000 // 0.0000,-0.5000, 0.0000, 1.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. - var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; - Console.WriteLine($"The 1-index value in resulting array would be produce by:"); - Console.WriteLine($"y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); - Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); - Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); + // Let's get transformation parameters. Since we work with only one + // column we need to pass 0 as parameter for + // GetNormalizerModelParameters. + // If we have multiple columns transformations we need to pass index of + // InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) + as BinNormalizerModelParameters>; + + Console.WriteLine($"The 1-index value in resulting array would be " + + $"produce by:"); + + Console.WriteLine("y = (Index(x) / " + transformParams.Density[0] + + ") - " + (transformParams.Offset.Length == 0 ? 0 : transformParams + .Offset[0])); + + Console.WriteLine("Where Index(x) is the index of the bin to which " + + "x belongs"); + + Console.WriteLine("Bins upper borders are: " + string.Join(" ", + transformParams.UpperBounds[0])); // Expected output: // The 1-index value in resulting array would be produce by: // y = (Index(x) / 2) - 0 // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: 4.5 7 ∞ - var fixZeroParams = normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; - Console.WriteLine($"The 1-index value in resulting array would be produce by:"); - Console.WriteLine($" y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); - Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); - Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + var fixZeroParams = normalizeFixZeroTransform + .GetNormalizerModelParameters(0) as BinNormalizerModelParameters< + ImmutableArray>; + + Console.WriteLine($"The 1-index value in resulting array would be " + + $"produce by:"); + + Console.WriteLine(" y = (Index(x) / " + fixZeroParams.Density[1] + + ") - " + (fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams + .Offset[1])); + + Console.WriteLine("Where Index(x) is the index of the bin to which x " + + "belongs"); + + Console.WriteLine("Bins upper borders are: " + string.Join(" ", + fixZeroParams.UpperBounds[1])); // Expected output: // The 1-index value in resulting array would be produce by: // y = (Index(x) / 2) - 0.5 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs index d30c6812ce..83ba740a0d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs @@ -11,35 +11,46 @@ class ReplaceMissingValues { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. var samples = new List() { - new DataPoint(){ Features = new float[3] {float.PositiveInfinity, 1, 0} }, + new DataPoint(){ Features = new float[3] {float.PositiveInfinity, 1, + 0 } }, + new DataPoint(){ Features = new float[3] {0, float.NaN, 1} }, new DataPoint(){ Features = new float[3] {-1, 2, -3} }, new DataPoint(){ Features = new float[3] {-1, float.NaN, -3} }, }; var data = mlContext.Data.LoadFromEnumerable(samples); - // Here we use the default replacement mode, which replaces the value with the default value for its type. - var defaultPipeline = mlContext.Transforms.ReplaceMissingValues("MissingReplaced", "Features", - MissingValueReplacingEstimator.ReplacementMode.DefaultValue); + // Here we use the default replacement mode, which replaces the value + // with the default value for its type. + var defaultPipeline = mlContext.Transforms.ReplaceMissingValues( + "MissingReplaced", "Features", MissingValueReplacingEstimator + .ReplacementMode.DefaultValue); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var defaultTransformer = defaultPipeline.Fit(data); var defaultTransformedData = defaultTransformer.Transform(data); - // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. - var defaultRowEnumerable = mlContext.Data.CreateEnumerable(defaultTransformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // SampleDataTransformed, the class we define below. + var defaultRowEnumerable = mlContext.Data.CreateEnumerable< + SampleDataTransformed>(defaultTransformedData, reuseRowObject: + false); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. foreach (var row in defaultRowEnumerable) - Console.WriteLine($"Features: [{string.Join(", ", row.Features)}]\t MissingReplaced: [{string.Join(", ", row.MissingReplaced)}]"); + Console.WriteLine("Features: [" + string.Join(", ", row.Features) + + "]\t MissingReplaced: [" + string.Join(", ", row + .MissingReplaced) + "]"); // Expected output: // Features: [∞, 1, 0] MissingReplaced: [∞, 1, 0] @@ -47,21 +58,29 @@ public static void Example() // Features: [-1, 2, -3] MissingReplaced: [-1, 2, -3] // Features: [-1, NaN, -3] MissingReplaced: [-1, 0, -3] - // Here we use the mean replacement mode, which replaces the value with the mean of the non values that were not missing. - var meanPipeline = mlContext.Transforms.ReplaceMissingValues("MissingReplaced", "Features", - MissingValueReplacingEstimator.ReplacementMode.Mean); + // Here we use the mean replacement mode, which replaces the value with + // the mean of the non values that were not missing. + var meanPipeline = mlContext.Transforms.ReplaceMissingValues( + "MissingReplaced", "Features", MissingValueReplacingEstimator + .ReplacementMode.Mean); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var meanTransformer = meanPipeline.Fit(data); var meanTransformedData = meanTransformer.Transform(data); - // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. - var meanRowEnumerable = mlContext.Data.CreateEnumerable(meanTransformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // SampleDataTransformed, the class we define below. + var meanRowEnumerable = mlContext.Data.CreateEnumerable< + SampleDataTransformed>(meanTransformedData, reuseRowObject: false); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. foreach (var row in meanRowEnumerable) - Console.WriteLine($"Features: [{string.Join(", ", row.Features)}]\t MissingReplaced: [{string.Join(", ", row.MissingReplaced)}]"); + Console.WriteLine("Features: [" + string.Join(", ", row.Features) + + "]\t MissingReplaced: [" + string.Join(", ", row + .MissingReplaced) + "]"); // Expected output: // Features: [∞, 1, 0] MissingReplaced: [∞, 1, 0] diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesMultiColumn.cs index aa5d1acf5b..2f17e5334b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesMultiColumn.cs @@ -10,39 +10,55 @@ class ReplaceMissingValuesMultiColumn { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. var samples = new List() { - new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 = new float[2] {1, 1} }, - new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1}, Features2 = new float[2] {0, 1} }, - new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3}, Features2 = new float[2] {-1, float.NaN} }, - new DataPoint(){ Features1 = new float[3] {-1, 6, -3}, Features2 = new float[2] {0, float.PositiveInfinity} }, + new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 = + new float[2] {1, 1} }, + + new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1}, + Features2 = new float[2] {0, 1} }, + + new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3}, + Features2 = new float[2] {-1, float.NaN} }, + + new DataPoint(){ Features1 = new float[3] {-1, 6, -3}, Features2 = + new float[2] {0, float.PositiveInfinity} }, }; var data = mlContext.Data.LoadFromEnumerable(samples); - // Here we use the default replacement mode, which replaces the value with the default value for its type. + // Here we use the default replacement mode, which replaces the value + // with the default value for its type. var defaultPipeline = mlContext.Transforms.ReplaceMissingValues(new[] { new InputOutputColumnPair("MissingReplaced1", "Features1"), new InputOutputColumnPair("MissingReplaced2", "Features2") }, MissingValueReplacingEstimator.ReplacementMode.DefaultValue); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. This operation doesn't actually evaluate + // data until we read the data below. var defaultTransformer = defaultPipeline.Fit(data); var defaultTransformedData = defaultTransformer.Transform(data); - // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. - var defaultRowEnumerable = mlContext.Data.CreateEnumerable(defaultTransformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // SampleDataTransformed, the class we define below. + var defaultRowEnumerable = mlContext.Data.CreateEnumerable< + SampleDataTransformed>(defaultTransformedData, reuseRowObject: + false); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. foreach (var row in defaultRowEnumerable) - Console.WriteLine($"Features1: [{string.Join(", ", row.Features1)}]\t MissingReplaced1: [{string.Join(", ", row.MissingReplaced1)}]\t " + - $"Features2: [{ string.Join(", ", row.Features2)}]\t MissingReplaced2: [{string.Join(", ", row.MissingReplaced2)}]"); + Console.WriteLine("Features1: [" + string.Join(", ", row + .Features1) + "]\t MissingReplaced1: [" + string.Join(", ", row + .MissingReplaced1) + "]\t Features2: [" + string.Join(", ", row + .Features2) + "]\t MissingReplaced2: [" + string.Join(", ", row + .MissingReplaced2) + "]"); // Expected output: // Features1: [1, 1, 0] MissingReplaced1: [1, 1, 0] Features2: [1, 1] MissingReplaced2: [1, 1] @@ -50,25 +66,34 @@ public static void Example() // Features1: [-1, NaN, -3] MissingReplaced1: [-1, 0, -3] Features2: [-1, NaN] MissingReplaced2: [-1, 0] // Features1: [-1, 6, -3] MissingReplaced1: [-1, 6, -3] Features2: [0, ∞] MissingReplaced2: [0, ∞] - // Here we use the mean replacement mode, which replaces the value with the mean of the non values that were not missing. + // Here we use the mean replacement mode, which replaces the value with + // the mean of the non values that were not missing. var meanPipeline = mlContext.Transforms.ReplaceMissingValues(new[] { new InputOutputColumnPair("MissingReplaced1", "Features1"), new InputOutputColumnPair("MissingReplaced2", "Features2") }, MissingValueReplacingEstimator.ReplacementMode.Mean); - // Now we can transform the data and look at the output to confirm the behavior of the estimator. - // This operation doesn't actually evaluate data until we read the data below. + // Now we can transform the data and look at the output to confirm the + // behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data + // below. var meanTransformer = meanPipeline.Fit(data); var meanTransformedData = meanTransformer.Transform(data); - // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. - var meanRowEnumerable = mlContext.Data.CreateEnumerable(meanTransformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // SampleDataTransformed, the class we define below. + var meanRowEnumerable = mlContext.Data.CreateEnumerable< + SampleDataTransformed>(meanTransformedData, reuseRowObject: false); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. foreach (var row in meanRowEnumerable) - Console.WriteLine($"Features1: [{string.Join(", ", row.Features1)}]\t MissingReplaced1: [{string.Join(", ", row.MissingReplaced1)}]\t " + - $"Features2: [{ string.Join(", ", row.Features2)}]\t MissingReplaced2: [{string.Join(", ", row.MissingReplaced2)}]"); + Console.WriteLine("Features1: [" + string.Join(", ", row + .Features1) + "]\t MissingReplaced1: [" + string.Join(", ", row + .MissingReplaced1) + "]\t Features2: [" + string.Join(", ", row + .Features2) + "]\t MissingReplaced2: [" + string.Join(", ", row + .MissingReplaced2) + "]"); // Expected output: // Features1: [1, 1, 0] MissingReplaced1: [1, 1, 0] Features2: [1, 1] MissingReplaced2: [1, 1] diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs index 796617d58f..22efcdc085 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs @@ -8,19 +8,30 @@ public static class SelectColumns { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new InputData(){ Age = 21, Gender = "Male", Education = "BS", ExtraColumn = 1 }, - new InputData(){ Age = 23, Gender = "Female", Education = "MBA", ExtraColumn = 2 }, - new InputData(){ Age = 28, Gender = "Male", Education = "PhD", ExtraColumn = 3 }, - new InputData(){ Age = 22, Gender = "Male", Education = "BS", ExtraColumn = 4 }, - new InputData(){ Age = 23, Gender = "Female", Education = "MS", ExtraColumn = 5 }, - new InputData(){ Age = 27, Gender = "Female", Education = "PhD", ExtraColumn = 6 }, + new InputData(){ Age = 21, Gender = "Male", Education = "BS", + ExtraColumn = 1 }, + + new InputData(){ Age = 23, Gender = "Female", Education = "MBA", + ExtraColumn = 2 }, + + new InputData(){ Age = 28, Gender = "Male", Education = "PhD", + ExtraColumn = 3 }, + + new InputData(){ Age = 22, Gender = "Male", Education = "BS", + ExtraColumn = 4 }, + + new InputData(){ Age = 23, Gender = "Female", Education = "MS", + ExtraColumn = 5 }, + + new InputData(){ Age = 27, Gender = "Female", Education = "PhD", + ExtraColumn = 6 }, }; // Convert training data to IDataView. @@ -29,22 +40,29 @@ public static void Example() // Select a subset of columns to keep. var pipeline = mlContext.Transforms.SelectColumns("Age", "Education"); - // Now we can transform the data and look at the output to confirm the behavior of SelectColumns. - // Don't forget that this operation doesn't actually evaluate data until we read the data below, - // as transformations are lazy in ML.NET. + // Now we can transform the data and look at the output to confirm the + // behavior of SelectColumns. Don't forget that this operation doesn't + // actually evaluate data until we read the data below, as + // transformations are lazy in ML.NET. var transformedData = pipeline.Fit(dataview).Transform(dataview); // Print the number of columns in the schema - Console.WriteLine($"There are {transformedData.Schema.Count} columns in the dataset."); + Console.WriteLine($"There are {transformedData.Schema.Count} columns" + + $" in the dataset."); // Expected output: // There are 2 columns in the dataset. - // We can extract the newly created column as an IEnumerable of TransformedData, the class we define below. - var rowEnumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of + // TransformedData, the class we define below. + var rowEnumerable = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + // And finally, we can write out the rows of the dataset, looking at the + // columns of interest. + Console.WriteLine($"Age and Educations columns obtained " + + $"post-transformation."); - // And finally, we can write out the rows of the dataset, looking at the columns of interest. - Console.WriteLine($"Age and Educations columns obtained post-transformation."); foreach (var row in rowEnumerable) Console.WriteLine($"Age: {row.Age} Education: {row.Education}"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/BinaryClassificationFeaturization.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/BinaryClassificationFeaturization.ttinclude index ec5507c3a7..b223b29df4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/BinaryClassificationFeaturization.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/BinaryClassificationFeaturization.ttinclude @@ -13,21 +13,25 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization {<#=Comments#> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of data points to be transformed. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); <# } #> @@ -39,14 +43,16 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new <#=TrainerOptions#>; // Define the tree-based featurizer's configuration. var options = new <#=Options#>; // Define the featurizer. - var pipeline = mlContext.Transforms.<#=Trainer#>(options); + var pipeline = mlContext.Transforms.<#=Trainer#>( + options); // Train the model. var model = pipeline.Fit(dataView); @@ -54,24 +60,35 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization // Apply the trained transformer to the considered data set. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join( + ",", dataPoint.Features) + "] is transformed to three " + + "different tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } <#=ExpectedOutput#> } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -82,13 +99,16 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 3).Select(x => x ? randomFloat() : randomFloat() + <#=DataSepValue#>).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 3).Select(x => x ? + randomFloat() : randomFloat() + <#=DataSepValue#>).ToArray() }; } } - // Example with label and 3 feature values. A data set is a collection of such examples. + // Example with label and 3 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -103,7 +123,8 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.cs index 6c9b100fa4..9ca0b87a85 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.cs @@ -13,20 +13,24 @@ public static class FastForestBinaryFeaturizationWithOptions // Microsoft.ML.FastTree. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of data points to be transformed. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); @@ -37,7 +41,8 @@ public static void Example() string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new FastForestBinaryTrainer.Options { // Create a simpler model by penalizing usage of new features. @@ -63,7 +68,8 @@ public static void Example() }; // Define the featurizer. - var pipeline = mlContext.Transforms.FeaturizeByFastForestBinary(options); + var pipeline = mlContext.Transforms.FeaturizeByFastForestBinary( + options); // Train the model. var model = pipeline.Fit(dataView); @@ -71,36 +77,50 @@ public static void Example() // Apply the trained transformer to the considered data set. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join( + ",", dataPoint.Features) + "] is transformed to three " + + "different tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } // Expected output: - // The original feature vector [0.8173254,0.7680227,0.5581612] is transformed to three different tree-based feature vectors: + // The original feature vector [0.8173254,0.7680227,0.5581612] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.1111111,0.8823529]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,0,1,0]. - // The original feature vector [0.5888848,0.9360271,0.4721779] is transformed to three different tree-based feature vectors: + // The original feature vector [0.5888848,0.9360271,0.4721779] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.4545455,0.8]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,1,1,1,0,1,0,1,1]. - // The original feature vector [0.2737045,0.2919063,0.4673147] is transformed to three different tree-based feature vectors: + // The original feature vector [0.2737045,0.2919063,0.4673147] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.4545455,0.1111111]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,1,0,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1,1,0,1,0,1,1]. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -111,13 +131,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 3).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 3).Select(x => x ? + randomFloat() : randomFloat() + 0.03f).ToArray() }; } } - // Example with label and 3 feature values. A data set is a collection of such examples. + // Example with label and 3 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -132,7 +155,8 @@ private class TransformedDataPoint : DataPoint public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.tt index a2640ea95e..e88e911a26 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestBinaryFeaturizationWithOptions.tt @@ -24,7 +24,8 @@ string TrainerOptions = @"FastForestBinaryTrainer.Options LabelColumnName = labelColumnName }"; -string Options = @"FastForestBinaryFeaturizationEstimator.Options +string Options = @"FastForestBinaryFeaturizationEstimator + .Options { InputColumnName = featureColumnName, TreesColumnName = treesColumnName, @@ -34,15 +35,18 @@ string Options = @"FastForestBinaryFeaturizationEstimator.Options }"; string ExpectedOutput = @"// Expected output: - // The original feature vector [0.8173254,0.7680227,0.5581612] is transformed to three different tree-based feature vectors: + // The original feature vector [0.8173254,0.7680227,0.5581612] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.1111111,0.8823529]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,0,1,0]. - // The original feature vector [0.5888848,0.9360271,0.4721779] is transformed to three different tree-based feature vectors: + // The original feature vector [0.5888848,0.9360271,0.4721779] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.4545455,0.8]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,1,1,1,0,1,0,1,1]. - // The original feature vector [0.2737045,0.2919063,0.4673147] is transformed to three different tree-based feature vectors: + // The original feature vector [0.2737045,0.2919063,0.4673147] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.4545455,0.1111111]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,1,0,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1,1,0,1,0,1,1]."; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.cs index ca6c5a27c8..cc107d35fd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.cs @@ -13,20 +13,24 @@ public static class FastForestRegressionFeaturizationWithOptions // Microsoft.ML.FastTree. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); @@ -37,7 +41,8 @@ public static void Example() string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new FastForestRegressionTrainer.Options { // Only use 80% of features to reduce over-fitting. @@ -63,44 +68,60 @@ public static void Example() }; // Define the featurizer. - var pipeline = mlContext.Transforms.FeaturizeByFastForestRegression(options); + var pipeline = mlContext.Transforms.FeaturizeByFastForestRegression( + options); // Train the model. var model = pipeline.Fit(dataView); - // Create testing data. Use different random seed to make it different from training data. + // Create testing data. Use different random seed to make it different + // from training data. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join(",", + dataPoint.Features) + "] is transformed to three different " + + "tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } // Expected output: - // The original feature vector[1.543569, 1.494266, 1.284405] is transformed to three different tree - based feature vectors: + // The original feature vector[1.543569, 1.494266, 1.284405] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.7291142,0.7825329,0.8764582]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,1,1,1,0,0,1,1,1,0,1]. - // The original feature vector[0.764918, 1.11206, 0.648211] is transformed to three different tree - based feature vectors: + // The original feature vector[0.764918, 1.11206, 0.648211] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.3802337,0.584159,0.5648927]. // Leave IDs' 0-1 representation: [0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1,0,0,1,1,0,0,0,1,1,0,0]. - // The original feature vector[1.251254, 1.269456, 1.444864] is transformed to three different tree - based feature vectors: + // The original feature vector[1.251254, 1.269456, 1.444864] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.7591804,0.7825329,0.7443035]. // Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,0,1,1,1,0,0,1,1,1,0,1]. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -110,12 +131,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 3).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 3).Select(x => x + + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -130,7 +153,8 @@ private class TransformedDataPoint : DataPoint public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.tt index 1d949629d4..a73542dacb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastForestRegressionFeaturizationWithOptions.tt @@ -32,15 +32,18 @@ string Options = @"FastForestRegressionFeaturizationEstimator.Options }"; string ExpectedOutput = @"// Expected output: - // The original feature vector[1.543569, 1.494266, 1.284405] is transformed to three different tree - based feature vectors: + // The original feature vector[1.543569, 1.494266, 1.284405] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.7291142,0.7825329,0.8764582]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,1,1,1,0,0,1,1,1,0,1]. - // The original feature vector[0.764918, 1.11206, 0.648211] is transformed to three different tree - based feature vectors: + // The original feature vector[0.764918, 1.11206, 0.648211] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.3802337,0.584159,0.5648927]. // Leave IDs' 0-1 representation: [0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1,0,0,1,1,0,0,0,1,1,0,0]. - // The original feature vector[1.251254, 1.269456, 1.444864] is transformed to three different tree - based feature vectors: + // The original feature vector[1.251254, 1.269456, 1.444864] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.7591804,0.7825329,0.7443035]. // Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,0,1,1,1,0,0,1,1,1,0,1]."; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.cs index c8c52e1490..521e04e23b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.cs @@ -13,20 +13,24 @@ public static class FastTreeBinaryFeaturizationWithOptions // Microsoft.ML.FastTree. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of data points to be transformed. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); @@ -37,7 +41,8 @@ public static void Example() string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new FastTreeBinaryTrainer.Options { // Use L2Norm for early stopping. @@ -65,7 +70,8 @@ public static void Example() }; // Define the featurizer. - var pipeline = mlContext.Transforms.FeaturizeByFastTreeBinary(options); + var pipeline = mlContext.Transforms.FeaturizeByFastTreeBinary( + options); // Train the model. var model = pipeline.Fit(dataView); @@ -73,36 +79,50 @@ public static void Example() // Apply the trained transformer to the considered data set. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join( + ",", dataPoint.Features) + "] is transformed to three " + + "different tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } // Expected output: - // The original feature vector [0.8173254,0.7680227,0.5581612] is transformed to three different tree-based feature vectors: + // The original feature vector [0.8173254,0.7680227,0.5581612] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.5714286,0.4636412,0.535588]. // Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1]. - // The original feature vector [0.5888848,0.9360271,0.4721779] is transformed to three different tree-based feature vectors: + // The original feature vector [0.5888848,0.9360271,0.4721779] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2352941,-0.1382389,0.535588]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1]. - // The original feature vector [0.2737045,0.2919063,0.4673147] is transformed to three different tree-based feature vectors: + // The original feature vector [0.2737045,0.2919063,0.4673147] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2352941,-0.1382389,-0.2184284]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,0,0,0]. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -113,13 +133,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 3).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 3).Select(x => x ? + randomFloat() : randomFloat() + 0.03f).ToArray() }; } } - // Example with label and 3 feature values. A data set is a collection of such examples. + // Example with label and 3 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -134,7 +157,8 @@ private class TransformedDataPoint : DataPoint public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.tt index ec055986d5..857365347f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeBinaryFeaturizationWithOptions.tt @@ -42,15 +42,18 @@ string ExpectedOutputPerInstance= @"// Expected output: // Label: False, Prediction: False"; string ExpectedOutput = @"// Expected output: - // The original feature vector [0.8173254,0.7680227,0.5581612] is transformed to three different tree-based feature vectors: + // The original feature vector [0.8173254,0.7680227,0.5581612] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.5714286,0.4636412,0.535588]. // Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1]. - // The original feature vector [0.5888848,0.9360271,0.4721779] is transformed to three different tree-based feature vectors: + // The original feature vector [0.5888848,0.9360271,0.4721779] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2352941,-0.1382389,0.535588]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1]. - // The original feature vector [0.2737045,0.2919063,0.4673147] is transformed to three different tree-based feature vectors: + // The original feature vector [0.2737045,0.2919063,0.4673147] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2352941,-0.1382389,-0.2184284]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,0,0,0]."; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.cs index 9e525643d3..e11df0eb70 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.cs @@ -13,20 +13,24 @@ public static class FastTreeRankingFeaturizationWithOptions // Microsoft.ML.FastTree. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); @@ -37,7 +41,8 @@ public static void Example() string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new FastTreeRankingTrainer.Options { // Reduce the number of trees to 3. @@ -61,7 +66,8 @@ public static void Example() }; // Define the featurizer. - var pipeline = mlContext.Transforms.FeaturizeByFastTreeRanking(options); + var pipeline = mlContext.Transforms.FeaturizeByFastTreeRanking( + options); // Train the model. var model = pipeline.Fit(dataView); @@ -69,36 +75,50 @@ public static void Example() // Apply the trained transformer to the considered data set. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join(",", + dataPoint.Features) + "] is transformed to three different " + + "tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } // Expected output: - // The original feature vector [1.117325,1.068023,0.8581612] is transformed to three different tree-based feature vectors: + // The original feature vector [1.117325,1.068023,0.8581612] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.4095458,0.2061437,0.2364294]. // Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]. - // The original feature vector [0.6588848,1.006027,0.5421779] is transformed to three different tree-based feature vectors: + // The original feature vector [0.6588848,1.006027,0.5421779] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2543825,-0.06570309,-0.1456212]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,0]. - // The original feature vector [0.6737045,0.6919063,0.8673147] is transformed to three different tree-based feature vectors: + // The original feature vector [0.6737045,0.6919063,0.8673147] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2543825,-0.06570309,0.01300209]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0, int groupSize = 10) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -110,13 +130,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se Label = (uint)label, GroupId = (uint)(i / groupSize), // Create random features that are correlated with the label. - // For data points with larger labels, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 3).Select(x => randomFloat() + x * 0.1f).ToArray() + // For data points with larger labels, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 3).Select(x => randomFloat() + + x * 0.1f).ToArray() }; } } - // Example with label, groupId, and 3 feature values. A data set is a collection of such examples. + // Example with label, groupId, and 3 feature values. A data set is a + // collection of such examples. private class DataPoint { [KeyType(5)] @@ -134,7 +157,8 @@ private class TransformedDataPoint : DataPoint public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.tt index 8be69bf2df..5c1fc30d7d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRankingFeaturizationWithOptions.tt @@ -32,15 +32,18 @@ string Comments= @" // Microsoft.ML.FastTree."; string ExpectedOutput = @"// Expected output: - // The original feature vector [1.117325,1.068023,0.8581612] is transformed to three different tree-based feature vectors: + // The original feature vector [1.117325,1.068023,0.8581612] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.4095458,0.2061437,0.2364294]. // Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]. - // The original feature vector [0.6588848,1.006027,0.5421779] is transformed to three different tree-based feature vectors: + // The original feature vector [0.6588848,1.006027,0.5421779] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2543825,-0.06570309,-0.1456212]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,0]. - // The original feature vector [0.6737045,0.6919063,0.8673147] is transformed to three different tree-based feature vectors: + // The original feature vector [0.6737045,0.6919063,0.8673147] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.2543825,-0.06570309,0.01300209]. // Leave IDs' 0-1 representation: [0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0]. // Paths IDs' 0-1 representation: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]."; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.cs index c8660e8127..407bb2e22f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.cs @@ -13,20 +13,24 @@ public static class FastTreeRegressionFeaturizationWithOptions // Microsoft.ML.FastTree. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); @@ -37,7 +41,8 @@ public static void Example() string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new FastTreeRegressionTrainer.Options { // Only use 80% of features to reduce over-fitting. @@ -63,44 +68,60 @@ public static void Example() }; // Define the featurizer. - var pipeline = mlContext.Transforms.FeaturizeByFastTreeRegression(options); + var pipeline = mlContext.Transforms.FeaturizeByFastTreeRegression( + options); // Train the model. var model = pipeline.Fit(dataView); - // Create testing data. Use different random seed to make it different from training data. + // Create testing data. Use different random seed to make it different + // from training data. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join(",", + dataPoint.Features) + "] is transformed to three different " + + "tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } // Expected output: - // The original feature vector [1.543569,1.494266,1.284405] is transformed to three different tree-based feature vectors: + // The original feature vector [1.543569,1.494266,1.284405] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.1507567,0.1372715,0.1019326]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,1,0,0,1,1,1,1,0]. - // The original feature vector [0.764918,1.11206,0.648211] is transformed to three different tree-based feature vectors: + // The original feature vector [0.764918,1.11206,0.648211] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.07604675,0.08244576,0.03080027]. // Leave IDs' 0-1 representation: [0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,1,1,0,0,1,1,0,0,0,1,0,0,0,1]. - // The original feature vector [1.251254,1.269456,1.444864] is transformed to three different tree-based feature vectors: + // The original feature vector [1.251254,1.269456,1.444864] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.1507567,0.1090626,0.0731837]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,1,0,0,1,1,1,1,0]. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -110,12 +131,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 3).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 3).Select(x => x + + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -130,7 +153,8 @@ private class TransformedDataPoint : DataPoint public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.tt index e22153c900..c03bf4e197 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeRegressionFeaturizationWithOptions.tt @@ -32,15 +32,18 @@ string Options = @"FastTreeRegressionFeaturizationEstimator.Options }"; string ExpectedOutput = @"// Expected output: - // The original feature vector [1.543569,1.494266,1.284405] is transformed to three different tree-based feature vectors: + // The original feature vector [1.543569,1.494266,1.284405] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.1507567,0.1372715,0.1019326]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,1,0,0,1,1,1,1,0]. - // The original feature vector [0.764918,1.11206,0.648211] is transformed to three different tree-based feature vectors: + // The original feature vector [0.764918,1.11206,0.648211] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.07604675,0.08244576,0.03080027]. // Leave IDs' 0-1 representation: [0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,1,1,0,0,1,1,0,0,0,1,0,0,0,1]. - // The original feature vector [1.251254,1.269456,1.444864] is transformed to three different tree-based feature vectors: + // The original feature vector [1.251254,1.269456,1.444864] is + // transformed to three different tree-based feature vectors: // Trees' output values: [0.1507567,0.1090626,0.0731837]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,1,0,0,1,1,1,1,0]."; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.cs index b6624560a3..085db4eeb7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.cs @@ -13,20 +13,24 @@ public static class FastTreeTweedieFeaturizationWithOptions // Microsoft.ML.FastTree. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); @@ -37,7 +41,8 @@ public static void Example() string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new FastTreeTweedieTrainer.Options { // Only use 80% of features to reduce over-fitting. @@ -63,44 +68,60 @@ public static void Example() }; // Define the featurizer. - var pipeline = mlContext.Transforms.FeaturizeByFastTreeTweedie(options); + var pipeline = mlContext.Transforms.FeaturizeByFastTreeTweedie( + options); // Train the model. var model = pipeline.Fit(dataView); - // Create testing data. Use different random seed to make it different from training data. + // Create testing data. Use different random seed to make it different + // from training data. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join(",", + dataPoint.Features) + "] is transformed to three different " + + "tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } // Expected output: - // The original feature vector [1.543569,1.494266,1.284405] is transformed to three different tree-based feature vectors: + // The original feature vector [1.543569,1.494266,1.284405] is + // transformed to three different tree-based feature vectors: // Trees' output values: [-0.05652997,-0.02312196,-0.01179363]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,0,1,0,1,1,0,0,0]. - // The original feature vector [0.764918,1.11206,0.648211] is transformed to three different tree-based feature vectors: + // The original feature vector [0.764918,1.11206,0.648211] is + // transformed to three different tree-based feature vectors: // Trees' output values: [-0.1933938,-0.1042738,-0.2312837]. // Leave IDs' 0-1 representation: [0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1,0,0,1,1,0,0,0,1,0,0,0,0]. - // The original feature vector [1.251254,1.269456,1.444864] is transformed to three different tree-based feature vectors: + // The original feature vector [1.251254,1.269456,1.444864] is + // transformed to three different tree-based feature vectors: // Trees' output values: [-0.05652997,-0.06082304,-0.04528879]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,0,1,0,1,1,1,0,1]. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -110,12 +131,14 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 3).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 3).Select(x => x + + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -130,7 +153,8 @@ private class TransformedDataPoint : DataPoint public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.tt index a075887d1e..b1ea23e2b2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/FastTreeTweedieFeaturizationWithOptions.tt @@ -39,15 +39,18 @@ string ExpectedOutputPerInstance= @"// Expected output: // Label: 0.096, Prediction: 0.140"; string ExpectedOutput = @"// Expected output: - // The original feature vector [1.543569,1.494266,1.284405] is transformed to three different tree-based feature vectors: + // The original feature vector [1.543569,1.494266,1.284405] is + // transformed to three different tree-based feature vectors: // Trees' output values: [-0.05652997,-0.02312196,-0.01179363]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,0,1,0,1,1,0,0,0]. - // The original feature vector [0.764918,1.11206,0.648211] is transformed to three different tree-based feature vectors: + // The original feature vector [0.764918,1.11206,0.648211] is + // transformed to three different tree-based feature vectors: // Trees' output values: [-0.1933938,-0.1042738,-0.2312837]. // Leave IDs' 0-1 representation: [0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1,0,0,1,1,0,0,0,1,0,0,0,0]. - // The original feature vector [1.251254,1.269456,1.444864] is transformed to three different tree-based feature vectors: + // The original feature vector [1.251254,1.269456,1.444864] is + // transformed to three different tree-based feature vectors: // Trees' output values: [-0.05652997,-0.06082304,-0.04528879]. // Leave IDs' 0-1 representation: [0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0]. // Paths IDs' 0-1 representation: [1,0,0,0,0,1,1,0,1,0,1,1,1,0,1]."; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/PretrainedTreeEnsembleFeaturizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/PretrainedTreeEnsembleFeaturizationWithOptions.cs index f96e595a63..b60c74672d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/PretrainedTreeEnsembleFeaturizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/PretrainedTreeEnsembleFeaturizationWithOptions.cs @@ -13,15 +13,17 @@ public static void Example() { // Create data set int dataPointCount = 200; - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(dataPointCount).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); // Define input and output columns of tree-based featurizer. @@ -31,7 +33,8 @@ public static void Example() string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define a tree model whose trees will be extracted to construct a tree featurizer. + // Define a tree model whose trees will be extracted to construct a tree + // featurizer. var trainer = mlContext.BinaryClassification.Trainers.FastTree( new FastTreeBinaryTrainer.Options { @@ -57,36 +60,51 @@ public static void Example() PathsColumnName = pathsColumnName }; - // Fit the created featurizer. It doesn't perform actual training because a pretrained model is provided. - var treeFeaturizer = mlContext.Transforms.FeaturizeByPretrainTreeEnsemble(options).Fit(dataView); + // Fit the created featurizer. It doesn't perform actual training + // because a pretrained model is provided. + var treeFeaturizer = mlContext.Transforms + .FeaturizeByPretrainTreeEnsemble(options).Fit(dataView); // Apply TreeEnsembleFeaturizer to the input data. var transformed = treeFeaturizer.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join( + ",", dataPoint.Features) + "] is transformed to three " + + "different tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } // Expected output: - // The original feature vector[0.8173254, 0.7680227, 0.5581612] is transformed to three different tree - based feature vectors: + // The original feature vector[0.8173254, 0.7680227, 0.5581612] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.4172185]. // Leave IDs' 0-1 representation: [1,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1]. - // The original feature vector[0.7588848, 1.106027, 0.6421779] is transformed to three different tree - based feature vectors: + // The original feature vector[0.7588848, 1.106027, 0.6421779] is + // transformed to three different tree - based feature vectors: // Trees' output values: [-1]. // Leave IDs' 0-1 representation: [0,0,1,0]. // Paths IDs' 0-1 representation: [1,1,0]. - // The original feature vector[0.2737045, 0.2919063, 0.4673147] is transformed to three different tree - based feature vectors: + // The original feature vector[0.2737045, 0.2919063, 0.4673147] is + // transformed to three different tree - based feature vectors: // Trees' output values: [0.4172185]. // Leave IDs' 0-1 representation: [1,0,0,0]. // Paths IDs' 0-1 representation: [1,1,1]. @@ -104,12 +122,14 @@ public static void Example() // / Leaf -4 // Leaf -1 // - // Thus, if a data point reaches Leaf indexed by -1, its 0-1 path representation may be [1,1,1] because that data point + // Thus, if a data point reaches Leaf indexed by -1, its 0-1 path + // representation may be [1,1,1] because that data point // went through all Node 0, Node 1, and Node 2. } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -120,13 +140,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 3).Select(x => x ? randomFloat() : randomFloat() + 0.2f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 3).Select(x => x ? + randomFloat() : randomFloat() + 0.2f).ToArray() }; } } - // Example with label and 3 feature values. A data set is a collection of such examples. + // Example with label and 3 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -137,11 +160,12 @@ private class DataPoint // Class used to capture the output of tree-base featurization. private class TransformedDataPoint : DataPoint { - // The i-th value is the output value of the i-th decision tree. + // The i-th value is the output value of the i-th decision tree. public float[] Trees { get; set; } - // The 0-1 encoding of leaves the input feature vector falls into. + // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RankingFeaturization.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RankingFeaturization.ttinclude index 16d6858c91..a10b9d8b96 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RankingFeaturization.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RankingFeaturization.ttinclude @@ -13,21 +13,25 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization {<#=Comments#> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); <# } #> @@ -39,14 +43,16 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new <#=TrainerOptions#>; // Define the tree-based featurizer's configuration. var options = new <#=Options#>; // Define the featurizer. - var pipeline = mlContext.Transforms.<#=Trainer#>(options); + var pipeline = mlContext.Transforms.<#=Trainer#>( + options); // Train the model. var model = pipeline.Fit(dataView); @@ -54,24 +60,35 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization // Apply the trained transformer to the considered data set. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join(",", + dataPoint.Features) + "] is transformed to three different " + + "tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } <#=ExpectedOutput#> } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0, int groupSize = 10) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -83,13 +100,16 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization Label = (uint)label, GroupId = (uint)(i / groupSize), // Create random features that are correlated with the label. - // For data points with larger labels, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 3).Select(x => randomFloat() + x * 0.1f).ToArray() + // For data points with larger labels, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 3).Select(x => randomFloat() + + x * 0.1f).ToArray() }; } } - // Example with label, groupId, and 3 feature values. A data set is a collection of such examples. + // Example with label, groupId, and 3 feature values. A data set is a + // collection of such examples. private class DataPoint { [KeyType(5)] @@ -107,7 +127,8 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RegressionFeaturization.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RegressionFeaturization.ttinclude index 28ee91ffaf..cb684b8387 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RegressionFeaturization.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization/RegressionFeaturization.ttinclude @@ -16,21 +16,25 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization <# } #> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(100).ToList(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); <# } #> @@ -42,39 +46,53 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); - // Define the configuration of the trainer used to train a tree-based model. + // Define the configuration of the trainer used to train a tree-based + // model. var trainerOptions = new <#=TrainerOptions#>; // Define the tree-based featurizer's configuration. var options = new <#=Options#>; // Define the featurizer. - var pipeline = mlContext.Transforms.<#=Trainer#>(options); + var pipeline = mlContext.Transforms.<#=Trainer#>( + options); // Train the model. var model = pipeline.Fit(dataView); - // Create testing data. Use different random seed to make it different from training data. + // Create testing data. Use different random seed to make it different + // from training data. var transformed = model.Transform(dataView); - // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformed, false).ToList(); + // Convert IDataView object to a list. Each element in the resulted list + // corresponds to a row in the IDataView. + var transformedDataPoints = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; - Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); - Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); - Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); - Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); + Console.WriteLine("The original feature vector [" + String.Join(",", + dataPoint.Features) + "] is transformed to three different " + + "tree-based feature vectors:"); + + Console.WriteLine(" Trees' output values: [" + String.Join(",", + transformedDataPoint.Trees) + "]."); + + Console.WriteLine(" Leave IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Leaves) + "]."); + + Console.WriteLine(" Paths IDs' 0-1 representation: [" + String + .Join(",", transformedDataPoint.Paths) + "]."); } <#=ExpectedOutput#> } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); for (int i = 0; i < count; i++) @@ -84,12 +102,14 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization { Label = label, // Create random features that are correlated with the label. - Features = Enumerable.Repeat(label, 3).Select(x => x + (float)random.NextDouble()).ToArray() + Features = Enumerable.Repeat(label, 3).Select(x => x + + (float)random.NextDouble()).ToArray() }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public float Label { get; set; } @@ -104,7 +124,8 @@ namespace Samples.Dynamic.Transforms.TreeFeaturization public float[] Trees { get; set; } // The 0-1 encoding of leaves the input feature vector falls into. public float[] Leaves { get; set; } - // The 0-1 encoding of paths the input feature vector reaches the leaves. + // The 0-1 encoding of paths the input feature vector reaches the + // leaves. public float[] Paths { get; set; } } }