diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.cs index aa0a99e245..9952909c64 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.cs @@ -8,19 +8,22 @@ namespace Samples.Dynamic.Trainers.Ranking { public static class FastTree { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -29,17 +32,21 @@ public static void Example() // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Take the top 5 rows. - var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5); + var topTransformedTestData = mlContext.Data.TakeRows( + transformedTestData, 5); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(topTransformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + topTransformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions) @@ -61,7 +68,8 @@ public static void Example() // NDCG: @1:0.99, @2:0.98, @3:0.99 } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0, int groupSize = 10) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -73,13 +81,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se Label = (uint)label, GroupId = (uint)(i / groupSize), // Create random features that are correlated with the label. - // For data points with larger labels, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray() + // For data points with larger labels, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select( + x => randomFloat() + x * 0.1f).ToArray() }; } } - // Example with label, groupId, and 50 feature values. A data set is a collection of such examples. + // Example with label, groupId, and 50 feature values. A data set is a + // collection of such examples. private class DataPoint { [KeyType(5)] @@ -102,8 +113,13 @@ private class Prediction // Pretty-print RankerMetrics objects. public static void PrintMetrics(RankingMetrics metrics) { - Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); - Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); + Console.WriteLine("DCG: " + string.Join(", ", + metrics.DiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); + + Console.WriteLine("NDCG: " + string.Join(", ", + metrics.NormalizedDiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.tt index ac6ee1e854..dfe2abd966 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTree.tt @@ -6,8 +6,9 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments= @" - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ExpectedOutputPerInstance = @"// Expected output: // Label: 5, Score: 13.0154 @@ -19,4 +20,4 @@ string ExpectedOutputPerInstance = @"// Expected output: string ExpectedOutput = @"// Expected output: // DCG: @1:41.95, @2:63.33, @3:75.65 // NDCG: @1:0.99, @2:0.98, @3:0.99"; -#> \ No newline at end of file +#> diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.cs index 5683153940..5d575ec160 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Ranking { public static class FastTreeWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -43,17 +46,21 @@ public static void Example() // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Take the top 5 rows. - var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5); + var topTransformedTestData = mlContext.Data.TakeRows( + transformedTestData, 5); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(topTransformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + topTransformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions) @@ -75,7 +82,8 @@ public static void Example() // NDCG: @1:0.96, @2:0.95, @3:0.97 } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0, int groupSize = 10) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -87,13 +95,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se Label = (uint)label, GroupId = (uint)(i / groupSize), // Create random features that are correlated with the label. - // For data points with larger labels, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray() + // For data points with larger labels, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select( + x => randomFloat() + x * 0.1f).ToArray() }; } } - // Example with label, groupId, and 50 feature values. A data set is a collection of such examples. + // Example with label, groupId, and 50 feature values. A data set is a + // collection of such examples. private class DataPoint { [KeyType(5)] @@ -116,8 +127,12 @@ private class Prediction // Pretty-print RankerMetrics objects. public static void PrintMetrics(RankingMetrics metrics) { - Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); - Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); + Console.WriteLine("DCG: " + string.Join(", ", + metrics.DiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); + Console.WriteLine("NDCG: " + string.Join(", ", + metrics.NormalizedDiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.tt index b4a65602aa..ce1140edf6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/FastTreeWithOptions.tt @@ -16,8 +16,9 @@ string TrainerOptions = @"FastTreeRankingTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers.FastTree;"; string Comments= @" - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ExpectedOutputPerInstance = @"// Expected output: // Label: 5, Score: 8.807633 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs index 3b6e542ef7..d6b3945d2b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs @@ -8,17 +8,22 @@ namespace Samples.Dynamic.Trainers.Ranking { public static class LightGbm { + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -27,39 +32,44 @@ public static void Example() // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Take the top 5 rows. - var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5); + var topTransformedTestData = mlContext.Data.TakeRows( + transformedTestData, 5); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(topTransformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + topTransformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions) Console.WriteLine($"Label: {p.Label}, Score: {p.Score}"); // Expected output: - // Label: 5, Score: 2.195333 - // Label: 4, Score: 0.2596574 - // Label: 4, Score: -2.168355 - // Label: 1, Score: -3.074823 - // Label: 1, Score: -1.523607 + // Label: 5, Score: 2.493263 + // Label: 1, Score: -4.528436 + // Label: 3, Score: -3.002865 + // Label: 3, Score: -2.151812 + // Label: 1, Score: -4.089102 // Evaluate the overall metrics. var metrics = mlContext.Ranking.Evaluate(transformedTestData); PrintMetrics(metrics); // Expected output: - // DCG: @1:26.03, @2:37.57, @3:45.83 - // NDCG: @1:0.61, @2:0.57, @3:0.59 + // DCG: @1:41.95, @2:63.76, @3:75.97 + // NDCG: @1:0.99, @2:0.99, @3:0.99 } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0, int groupSize = 10) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -71,13 +81,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se Label = (uint)label, GroupId = (uint)(i / groupSize), // Create random features that are correlated with the label. - // For data points with larger labels, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray() + // For data points with larger labels, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select( + x => randomFloat() + x * 0.1f).ToArray() }; } } - // Example with label, groupId, and 50 feature values. A data set is a collection of such examples. + // Example with label, groupId, and 50 feature values. A data set is a + // collection of such examples. private class DataPoint { [KeyType(5)] @@ -100,8 +113,12 @@ private class Prediction // Pretty-print RankerMetrics objects. public static void PrintMetrics(RankingMetrics metrics) { - Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); - Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); + Console.WriteLine("DCG: " + string.Join(", ", + metrics.DiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); + Console.WriteLine("NDCG: " + string.Join(", ", + metrics.NormalizedDiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.tt index 136cad7843..f8c628c26d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.tt @@ -6,8 +6,9 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments= @" - // This example requires installation of additional NuGet package - // Microsoft.ML.LightGbm."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ExpectedOutputPerInstance = @"// Expected output: // Label: 5, Score: 2.493263 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs index 1c21c7704a..4859cd7b81 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.Ranking { public static class LightGbmWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.LightGbm. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -44,17 +47,21 @@ public static void Example() // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Take the top 5 rows. - var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5); + var topTransformedTestData = mlContext.Data.TakeRows( + transformedTestData, 5); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(topTransformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + topTransformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions) @@ -76,7 +83,8 @@ public static void Example() // NDCG: @1:0.69, @2:0.72, @3:0.74 } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0, int groupSize = 10) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -88,13 +96,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se Label = (uint)label, GroupId = (uint)(i / groupSize), // Create random features that are correlated with the label. - // For data points with larger labels, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray() + // For data points with larger labels, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select( + x => randomFloat() + x * 0.1f).ToArray() }; } } - // Example with label, groupId, and 50 feature values. A data set is a collection of such examples. + // Example with label, groupId, and 50 feature values. A data set is a + // collection of such examples. private class DataPoint { [KeyType(5)] @@ -117,8 +128,12 @@ private class Prediction // Pretty-print RankerMetrics objects. public static void PrintMetrics(RankingMetrics metrics) { - Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); - Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); + Console.WriteLine("DCG: " + string.Join(", ", + metrics.DiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); + Console.WriteLine("NDCG: " + string.Join(", ", + metrics.NormalizedDiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.tt index 0de6d61d6d..939260bd94 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.tt @@ -17,8 +17,9 @@ string TrainerOptions = @"LightGbmRankingTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers.LightGbm;"; string Comments= @" - // This example requires installation of additional NuGet package - // Microsoft.ML.LightGbm."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ExpectedOutputPerInstance = @"// Expected output: // Label: 5, Score: 0.05836755 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/PermutationFeatureImportance.cs index 41928a70ee..04b82151ac 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/PermutationFeatureImportance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/PermutationFeatureImportance.cs @@ -9,8 +9,9 @@ public static class PermutationFeatureImportance { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(seed:1); // Create sample data. @@ -19,12 +20,15 @@ public static void Example() // Load the sample data as an IDataView. var data = mlContext.Data.LoadFromEnumerable(samples); - // Define a training pipeline that concatenates features into a vector, normalizes them, and then - // trains a linear model. - var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; - var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) + // Define a training pipeline that concatenates features into a vector, + // normalizes them, and then trains a linear model. + var featureColumns = new string[] { nameof(Data.Feature1), nameof( + Data.Feature2) }; + var pipeline = mlContext.Transforms.Concatenate("Features", + featureColumns) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) - .Append(mlContext.Transforms.Conversion.MapValueToKey("GroupId")) + .Append(mlContext.Transforms.Conversion.MapValueToKey( + "GroupId")) .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Ranking.Trainers.FastTree()); @@ -37,18 +41,24 @@ public static void Example() // Extract the predictor. var linearPredictor = model.LastTransformer; - // Compute the permutation metrics for the linear model using the normalized data. + // Compute the permutation metrics for the linear model using the + // normalized data. var permutationMetrics = mlContext.Ranking.PermutationFeatureImportance( linearPredictor, transformedData, permutationCount: 30); - // Now let's look at which features are most important to the model overall. - // Get the feature indices sorted by their impact on NDCG@1. - var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.NormalizedDiscountedCumulativeGains}) - .OrderByDescending(feature => Math.Abs(feature.NormalizedDiscountedCumulativeGains[0].Mean)) + // Now let's look at which features are most important to the model + // overall. Get the feature indices sorted by their impact on NDCG@1. + var sortedIndices = permutationMetrics.Select((metrics, index) => new { + index, metrics.NormalizedDiscountedCumulativeGains}) + .OrderByDescending(feature => Math.Abs( + feature.NormalizedDiscountedCumulativeGains[0].Mean)) + .Select(feature => feature.index); - Console.WriteLine("Feature\tChange in NDCG@1\t95% Confidence in the Mean Change in NDCG@1"); - var ndcg = permutationMetrics.Select(x => x.NormalizedDiscountedCumulativeGains).ToArray(); + Console.WriteLine("Feature\tChange in NDCG@1\t95% Confidence in the" + + "Mean Change in NDCG@1"); + var ndcg = permutationMetrics.Select( + x => x.NormalizedDiscountedCumulativeGains).ToArray(); foreach (int i in sortedIndices) { Console.WriteLine("{0}\t{1:G4}\t{2:G4}", @@ -78,14 +88,25 @@ private class Data /// Generate an enumerable of Data objects, creating the label as a simple /// linear combination of the features. /// + /// /// The number of examples. - /// The bias, or offset, in the calculation of the label. - /// The weight to multiply the first feature with to compute the label. - /// The weight to multiply the second feature with to compute the label. - /// The seed for generating feature values and label noise. + /// + /// The bias, or offset, in the calculation of the label. + /// + /// + /// The weight to multiply the first feature with to + /// compute the label. + /// + /// The weight to multiply the second feature with to + /// compute the label. + /// + /// The seed for generating feature values and label + /// noise. + /// /// An enumerable of Data objects. private static IEnumerable GenerateData(int nExamples = 10000, - double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1, int groupSize = 5) + double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1, + int groupSize = 5) { var rng = new Random(seed); var max = bias + 4.5 * weight1 + 4.5 * weight2 + 0.5; @@ -99,7 +120,8 @@ private static IEnumerable GenerateData(int nExamples = 10000, }; // Create a noisy label. - var value = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + var value = (float)(bias + weight1 * data.Feature1 + weight2 * + data.Feature2 + rng.NextDouble() - 0.5); if (value < max / 3) data.Label = 0; else if (value < 2 * max / 3) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/Ranking.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/Ranking.ttinclude index cc90a58187..05ed4a68b4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/Ranking.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/Ranking.ttinclude @@ -13,15 +13,17 @@ namespace Samples.Dynamic.Trainers.Ranking {<#=Comments#> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (TrainerOptions == null) { #> @@ -38,17 +40,21 @@ namespace Samples.Dynamic.Trainers.Ranking // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data.LoadFromEnumerable( + GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Take the top 5 rows. - var topTransformedTestData = mlContext.Data.TakeRows(transformedTestData, 5); + var topTransformedTestData = mlContext.Data.TakeRows( + transformedTestData, 5); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(topTransformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable( + topTransformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions) @@ -63,7 +69,8 @@ namespace Samples.Dynamic.Trainers.Ranking <#=ExpectedOutput#> } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0, int groupSize = 10) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0, int groupSize = 10) { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -75,13 +82,16 @@ namespace Samples.Dynamic.Trainers.Ranking Label = (uint)label, GroupId = (uint)(i / groupSize), // Create random features that are correlated with the label. - // For data points with larger labels, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => randomFloat() + x * 0.1f).ToArray() + // For data points with larger labels, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select( + x => randomFloat() + x * 0.1f).ToArray() }; } } - // Example with label, groupId, and 50 feature values. A data set is a collection of such examples. + // Example with label, groupId, and 50 feature values. A data set is a + // collection of such examples. private class DataPoint { [KeyType(5)] @@ -104,8 +114,12 @@ namespace Samples.Dynamic.Trainers.Ranking // Pretty-print RankerMetrics objects. public static void PrintMetrics(RankingMetrics metrics) { - Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); - Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F2}").ToArray())}"); + Console.WriteLine("DCG: " + string.Join(", ", + metrics.DiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); + Console.WriteLine("NDCG: " + string.Join(", ", + metrics.NormalizedDiscountedCumulativeGains.Select( + (d, i) => (i + 1) + ":" + d + ":F2").ToArray())); } } -} \ No newline at end of file +}