dotnet · sierralee51 · Jun 28, 2019 · Jun 26, 2019 · Jun 27, 2019 · Jun 27, 2019
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs
@@ -7,8 +7,9 @@ public static class BootstrapSample
     {
         public static void Example()
         {
-            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
-            // as a catalog of available operations and as the source of randomness.
+            // Create a new context for ML.NET operations. It can be used for
+            // exception tracking and logging, as a catalog of available operations 
+            // and as the source of randomness.
             var mlContext = new MLContext();
 
             // Get a small dataset as an IEnumerable.
@@ -23,20 +24,27 @@ public static void Example()
 
             var data = mlContext.Data.LoadFromEnumerable(rawData);
 
-            // Now take a bootstrap sample of this dataset to create a new dataset. The bootstrap is a resampling technique that
-            // creates a training set of the same size by picking with replacement from the original dataset. With the bootstrap, 
-            // we expect that the resampled dataset will have about 63% of the rows of the original dataset (i.e. 1-e^-1), with some
-            // rows represented more than once.
-            // BootstrapSample is a streaming implementation of the boostrap that enables sampling from a dataset too large to hold in memory.
-            // To enable streaming, BootstrapSample approximates the bootstrap by sampling each row according to a Poisson(1) distribution.
-            // Note that this streaming approximation treats each row independently, thus the resampled dataset is not guaranteed to be the 
-            // same length as the input dataset.
-            // Let's take a look at the behavior of the BootstrapSample by examining a few draws:
+            // Now take a bootstrap sample of this dataset to create a new dataset. 
+            // The bootstrap is a resampling technique that creates a training set
+            // of the same size by picking with replacement from the original
+            // dataset. With the bootstrap, we expect that the resampled dataset
+            // will have about 63% of the rows of the original dataset
+            // (i.e. 1-e^-1), with some rows represented more than once.
+            // BootstrapSample is a streaming implementation of the boostrap that
+            // enables sampling from a dataset too large to hold in memory. To
+            // enable streaming, BootstrapSample approximates the bootstrap by 
+            // sampling each row according to a Poisson(1) distribution. Note that
+            // this streaming approximation treats each row independently, thus the
+            // resampled dataset is not guaranteed to be the same length as the 
+            // input dataset. Let's take a look at the behavior of the
+            // BootstrapSample by examining a few draws:
             for (int i = 0; i < 3; i++)
             {
                 var resample = mlContext.Data.BootstrapSample(data, seed: i);
 
-                var enumerable = mlContext.Data.CreateEnumerable<DataPoint>(resample, reuseRowObject: false);
+                var enumerable = mlContext.Data
+                    .CreateEnumerable<DataPoint>(resample, reuseRowObject: false);
+
                 Console.WriteLine($"Label\tFeature");
                 foreach (var row in enumerable)
                 {

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs
@@ -8,56 +8,78 @@ public static class Cache
     {
         public static void Example()
         {
-            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
-            // as a catalog of available operations and as the source of randomness.
+            // Create a new context for ML.NET operations. It can be used for except
+            // ion tracking and logging, as a catalog of available operations and as
+            // the source of randomness.
             var mlContext = new MLContext();
 
             var data = DatasetUtils.LoadHousingRegressionDataset(mlContext);
 
             // Time how long it takes to page through the records if we don't cache.
-            (int lines, double columnAverage, double elapsedSeconds) = TimeToScanIDataView(mlContext, data);
-            Console.WriteLine($"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds} seconds.");
+            (int lines, double columnAverage, double elapsedSeconds) =
+                TimeToScanIDataView(mlContext, data);
+
+            Console.WriteLine($"Lines={lines}," +
+                $"averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds}" +
+                $"seconds.");
             // Expected output (time is approximate):
             // Lines=506, averageOfColumn0=564.17 and took 0.314 seconds.
 
             // Now create a cached view of the data.
             var cachedData = mlContext.Data.Cache(data);
 
-            // Time how long it takes to page through the records the first time they're accessed after a cache is applied.
-            // This iteration will be longer than subsequent calls, as the dataset is being accessed and stored for later.
-            // Note that this operation may be relatively quick, as the system may have cached the file.
-            (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext, cachedData);
-            Console.WriteLine($"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds} seconds.");
+            // Time how long it takes to page through the records the first time
+            // they're accessed after a cache is applied. This iteration will be
+            // longer than subsequent calls, as the dataset is being accessed and
+            // stored for later. Note that this operation may be relatively quick,
+            // as the system may have cached the file.
+            (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext,
+                cachedData);
+
+            Console.WriteLine($"Lines={lines}," +
+                $"averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds}" +
+                $"seconds.");
             // Expected output (time is approximate):
             // Lines=506, averageOfColumn0=564.17 and took 0.056 seconds.
 
-            // Time how long it takes to page through the records now that the data is cached. After the first iteration that caches the IDataView,
-            // future iterations, like this one, are faster because they are pulling from data cached in memory.
-            (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext, cachedData);
-            Console.WriteLine($"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds} seconds.");
+            // Time how long it takes to page through the records now that the data
+            // is cached. After the first iteration that caches the IDataView,
+            // future iterations, like this one, are faster because they are pulling
+            // from data cached in memory.
+            (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext,
+                cachedData);
+
+            Console.WriteLine(
+                $"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took " +
+                $"{elapsedSeconds} seconds.");
             // Expected output (time is approximate):
             // Lines=506, averageOfColumn0=564.17 and took 0.006 seconds.
         }
 
-        private static (int lines, double columnAverage, double elapsedSeconds) TimeToScanIDataView(MLContext mlContext, IDataView data)
+        private static (int lines, double columnAverage, double elapsedSeconds)
+            TimeToScanIDataView(MLContext mlContext, IDataView data)
         {
             int lines = 0;
             double columnAverage = 0.0;
-            var enumerable = mlContext.Data.CreateEnumerable<HousingRegression>(data, reuseRowObject: true);
+            var enumerable = mlContext.Data
+                .CreateEnumerable<HousingRegression>(data, reuseRowObject: true);
+
             var watch = System.Diagnostics.Stopwatch.StartNew();
             foreach (var row in enumerable)
             {
                 lines++;
-                columnAverage += row.MedianHomeValue + row.CrimesPerCapita + row.PercentResidental + row.PercentNonRetail + row.CharlesRiver 
-                    + row.NitricOxides + row.RoomsPerDwelling + row.PercentPre40s + row.EmploymentDistance 
-                    + row.HighwayDistance + row.TaxRate + row.TeacherRatio;
+                columnAverage += row.MedianHomeValue + row.CrimesPerCapita +
+                    row.PercentResidental + row.PercentNonRetail + row.CharlesRiver
+                    + row.NitricOxides + row.RoomsPerDwelling + row.PercentPre40s +
+                    row.EmploymentDistance + row.HighwayDistance + row.TaxRate +
+                    row.TeacherRatio;
             }
             watch.Stop();
             columnAverage /= lines;
             var elapsed = watch.Elapsed;
 
             return (lines, columnAverage, elapsed.Seconds);
-        }       
+        }
 
         /// <summary>
         /// A class to hold the raw housing regression rows.

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs
@@ -17,16 +17,28 @@ public static void Example()
             // Generate some data points.
             var examples = GenerateRandomDataPoints(10);
 
-            // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
+            // Convert the examples list to an IDataView object, which is consumable
+            // by ML.NET API.
             var dataview = mlContext.Data.LoadFromEnumerable(examples);
 
-            // Cross validation splits your data randomly into set of "folds", and creates groups of Train and Test sets,
-            // where for each group, one fold is the Test and the rest of the folds the Train.
-            // So below, we specify Group column as the column containing the sampling keys.
-            // If we pass that column to cross validation it would be used to break data into certain chunks.
-            var folds = mlContext.Data.CrossValidationSplit(dataview, numberOfFolds: 3, samplingKeyColumnName: "Group");
-            var trainSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[0].TrainSet, reuseRowObject: false);
-            var testSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[0].TestSet, reuseRowObject: false);
+            // Cross validation splits your data randomly into set of "folds", and
+            // creates groups of Train and Test sets, where for each group, one fold
+            // is the Test and the rest of the folds the Train. So below, we specify
+            // Group column as the column containing the sampling keys. If we pass
+            // that column to cross validation it would be used to break data into
+            // certain chunks.
+            var folds = mlContext.Data
+                .CrossValidationSplit(dataview, numberOfFolds:3,
+                samplingKeyColumnName: "Group");
+
+            var trainSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[0].TrainSet,
+                reuseRowObject: false);
+
+            var testSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[0].TestSet,
+                reuseRowObject: false);
+
             PrintPreviewRows(trainSet, testSet);
 
             // The data in the Train split.
@@ -43,8 +55,14 @@ public static void Example()
             // [Group, 0], [Features, 0.9060271]
             // [Group, 0], [Features, 0.2737045]
 
-            trainSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[1].TrainSet, reuseRowObject: false);
-            testSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[1].TestSet, reuseRowObject: false);
+            trainSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[1].TrainSet,
+                reuseRowObject: false);
+
+            testSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[1].TestSet,
+                reuseRowObject: false);
+
             PrintPreviewRows(trainSet, testSet);
             // The data in the Train split.
             // [Group, 0], [Features, 0.7262433]
@@ -60,8 +78,14 @@ public static void Example()
             // [Group, 1], [Features, 0.2060332]
             // [Group, 1], [Features, 0.4421779]
 
-            trainSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[2].TrainSet, reuseRowObject: false);
-            testSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[2].TestSet, reuseRowObject: false);
+            trainSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[2].TrainSet,
+                reuseRowObject: false);
+
+            testSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[2].TestSet,
+                reuseRowObject: false);
+
             PrintPreviewRows(trainSet, testSet);
             // The data in the Train split.
             // [Group, 0], [Features, 0.7262433]
@@ -79,8 +103,14 @@ public static void Example()
 
             // Example of a split without specifying a sampling key column.
             folds = mlContext.Data.CrossValidationSplit(dataview, numberOfFolds: 3);
-            trainSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[0].TrainSet, reuseRowObject: false);
-            testSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[0].TestSet, reuseRowObject: false);
+            trainSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[0].TrainSet,
+                reuseRowObject: false);
+
+            testSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[0].TestSet,
+                reuseRowObject: false);
+
             PrintPreviewRows(trainSet, testSet);
             // The data in the Train split.
             // [Group, 0], [Features, 0.7262433]
@@ -96,8 +126,14 @@ public static void Example()
             // [Group, 2], [Features, 0.5588848]
             // [Group, 0], [Features, 0.9060271]
 
-            trainSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[1].TrainSet, reuseRowObject: false);
-            testSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[1].TestSet, reuseRowObject: false);
+            trainSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[1].TrainSet,
+                reuseRowObject: false);
+
+            testSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[1].TestSet,
+                reuseRowObject: false);
+
             PrintPreviewRows(trainSet, testSet);
             // The data in the Train split.
             // [Group, 2], [Features, 0.7680227]
@@ -113,8 +149,13 @@ public static void Example()
             // [Group, 2], [Features, 0.9775497]
             // [Group, 0], [Features, 0.2737045]
 
-            trainSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[2].TrainSet, reuseRowObject: false);
-            testSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[2].TestSet, reuseRowObject: false);
+            trainSet = mlContext.Data
+                .CreateEnumerable<DataPoint>(folds[2].TrainSet,
+                reuseRowObject: false);
+
+            testSet = mlContext.Data.CreateEnumerable<DataPoint>(folds[2].TestSet, 
+                reuseRowObject: false);
+
             PrintPreviewRows(trainSet, testSet);
             // The data in the Train split.
             // [Group, 0], [Features, 0.7262433]
@@ -131,7 +172,9 @@ public static void Example()
             // [Group, 1], [Features, 0.4421779]
         }
 
-        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0)
+        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, 
+            int seed = 0)
+
         {
             var random = new Random(seed);
             for (int i = 0; i < count; i++)
@@ -146,7 +189,8 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
             }
         }
 
-        // Example with features and group column. A data set is a collection of such examples.
+        // Example with features and group column. A data set is a collection of
+        // such examples.
         private class DataPoint
         {
             public float Group { get; set; }
@@ -155,7 +199,9 @@ private class DataPoint
         }
 
         // print helper
-        private static void PrintPreviewRows(IEnumerable<DataPoint> trainSet, IEnumerable<DataPoint> testSet)
+        private static void PrintPreviewRows(IEnumerable<DataPoint> trainSet, 
+            IEnumerable<DataPoint> testSet)
+
         {
 
             Console.WriteLine($"The data in the Train split.");