diff --git a/ZBaselines/Common/EntryPoints/core_ep-list.tsv b/ZBaselines/Common/EntryPoints/core_ep-list.tsv
index 47007edaa6..22c2767d7a 100644
--- a/ZBaselines/Common/EntryPoints/core_ep-list.tsv
+++ b/ZBaselines/Common/EntryPoints/core_ep-list.tsv
@@ -1,7 +1,8 @@
+Data.CustomTextLoader	Import a dataset from a text file	Microsoft.ML.Runtime.EntryPoints.ImportTextData	ImportText	Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input	Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output
 Data.DataViewReference	Pass dataview from memory to experiment	Microsoft.ML.Runtime.EntryPoints.DataViewReference	ImportData	Microsoft.ML.Runtime.EntryPoints.DataViewReference+Input	Microsoft.ML.Runtime.EntryPoints.DataViewReference+Output
 Data.IDataViewArrayConverter	Create and array variable	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro	MakeArray	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewInput	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewOutput
 Data.PredictorModelArrayConverter	Create and array variable	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro	MakeArray	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelInput	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelOutput
-Data.TextLoader	Import a dataset from a text file	Microsoft.ML.Runtime.EntryPoints.ImportTextData	ImportText	Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input	Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output
+Data.TextLoader	Import a dataset from a text file	Microsoft.ML.Runtime.EntryPoints.ImportTextData	TextLoader	Microsoft.ML.Runtime.EntryPoints.ImportTextData+LoaderInput	Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output
 Models.AnomalyDetectionEvaluator	Evaluates an anomaly detection scored dataset.	Microsoft.ML.Runtime.Data.Evaluate	AnomalyDetection	Microsoft.ML.Runtime.Data.AnomalyDetectionMamlEvaluator+Arguments	Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput
 Models.BinaryClassificationEvaluator	Evaluates a binary classification scored dataset.	Microsoft.ML.Runtime.Data.Evaluate	Binary	Microsoft.ML.Runtime.Data.BinaryClassifierMamlEvaluator+Arguments	Microsoft.ML.Runtime.EntryPoints.CommonOutputs+ClassificationEvaluateOutput
 Models.BinaryCrossValidator	Cross validation for binary classification	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro	CrossValidateBinary	Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Arguments	Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Output]
diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json
index a6309fe36a..6eeb1bf709 100644
--- a/ZBaselines/Common/EntryPoints/core_manifest.json
+++ b/ZBaselines/Common/EntryPoints/core_manifest.json
@@ -1,5 +1,43 @@
 {
   "EntryPoints": [
+    {
+      "Name": "Data.CustomTextLoader",
+      "Desc": "Import a dataset from a text file",
+      "FriendlyName": null,
+      "ShortName": null,
+      "Inputs": [
+        {
+          "Name": "InputFile",
+          "Type": "FileHandle",
+          "Desc": "Location of the input file",
+          "Aliases": [
+            "data"
+          ],
+          "Required": true,
+          "SortOrder": 1.0,
+          "IsNullable": false
+        },
+        {
+          "Name": "CustomSchema",
+          "Type": "String",
+          "Desc": "Custom schema to use for parsing",
+          "Aliases": [
+            "schema"
+          ],
+          "Required": false,
+          "SortOrder": 2.0,
+          "IsNullable": false,
+          "Default": null
+        }
+      ],
+      "Outputs": [
+        {
+          "Name": "Data",
+          "Type": "DataView",
+          "Desc": "The resulting data view"
+        }
+      ]
+    },
     {
       "Name": "Data.DataViewReference",
       "Desc": "Pass dataview from memory to experiment",
@@ -99,16 +137,325 @@
           "IsNullable": false
         },
         {
-          "Name": "CustomSchema",
-          "Type": "String",
-          "Desc": "Custom schema to use for parsing",
+          "Name": "Arguments",
+          "Type": {
+            "Kind": "Struct",
+            "Fields": [
+              {
+                "Name": "Column",
+                "Type": {
+                  "Kind": "Array",
+                  "ItemType": {
+                    "Kind": "Struct",
+                    "Fields": [
+                      {
+                        "Name": "Name",
+                        "Type": "String",
+                        "Desc": "Name of the column",
+                        "Required": false,
+                        "SortOrder": 150.0,
+                        "IsNullable": false,
+                        "Default": null
+                      },
+                      {
+                        "Name": "Type",
+                        "Type": {
+                          "Kind": "Enum",
+                          "Values": [
+                            "I1",
+                            "U1",
+                            "I2",
+                            "U2",
+                            "I4",
+                            "U4",
+                            "I8",
+                            "U8",
+                            "R4",
+                            "Num",
+                            "R8",
+                            "TX",
+                            "Text",
+                            "TXT",
+                            "BL",
+                            "Bool",
+                            "TimeSpan",
+                            "TS",
+                            "DT",
+                            "DateTime",
+                            "DZ",
+                            "DateTimeZone",
+                            "UG",
+                            "U16"
+                          ]
+                        },
+                        "Desc": "Type of the items in the column",
+                        "Required": false,
+                        "SortOrder": 150.0,
+                        "IsNullable": true,
+                        "Default": null
+                      },
+                      {
+                        "Name": "Source",
+                        "Type": {
+                          "Kind": "Array",
+                          "ItemType": {
+                            "Kind": "Struct",
+                            "Fields": [
+                              {
+                                "Name": "Min",
+                                "Type": "Int",
+                                "Desc": "First index in the range",
+                                "Required": true,
+                                "SortOrder": 150.0,
+                                "IsNullable": false,
+                                "Default": 0
+                              },
+                              {
+                                "Name": "Max",
+                                "Type": "Int",
+                                "Desc": "Last index in the range",
+                                "Required": false,
+                                "SortOrder": 150.0,
+                                "IsNullable": true,
+                                "Default": null
+                              },
+                              {
+                                "Name": "AutoEnd",
+                                "Type": "Bool",
+                                "Desc": "This range extends to the end of the line, but should be a fixed number of items",
+                                "Aliases": [
+                                  "auto"
+                                ],
+                                "Required": false,
+                                "SortOrder": 150.0,
+                                "IsNullable": false,
+                                "Default": false
+                              },
+                              {
+                                "Name": "VariableEnd",
+                                "Type": "Bool",
+                                "Desc": "This range extends to the end of the line, which can vary from line to line",
+                                "Aliases": [
+                                  "var"
+                                ],
+                                "Required": false,
+                                "SortOrder": 150.0,
+                                "IsNullable": false,
+                                "Default": false
+                              },
+                              {
+                                "Name": "AllOther",
+                                "Type": "Bool",
+                                "Desc": "This range includes only other indices not specified",
+                                "Aliases": [
+                                  "other"
+                                ],
+                                "Required": false,
+                                "SortOrder": 150.0,
+                                "IsNullable": false,
+                                "Default": false
+                              },
+                              {
+                                "Name": "ForceVector",
+                                "Type": "Bool",
+                                "Desc": "Force scalar columns to be treated as vectors of length one",
+                                "Aliases": [
+                                  "vector"
+                                ],
+                                "Required": false,
+                                "SortOrder": 150.0,
+                                "IsNullable": false,
+                                "Default": false
+                              }
+                            ]
+                          }
+                        },
+                        "Desc": "Source index range(s) of the column",
+                        "Aliases": [
+                          "src"
+                        ],
+                        "Required": false,
+                        "SortOrder": 150.0,
+                        "IsNullable": false,
+                        "Default": null
+                      },
+                      {
+                        "Name": "KeyRange",
+                        "Type": {
+                          "Kind": "Struct",
+                          "Fields": [
+                            {
+                              "Name": "Min",
+                              "Type": "UInt",
+                              "Desc": "First index in the range",
+                              "Required": false,
+                              "SortOrder": 150.0,
+                              "IsNullable": false,
+                              "Default": 0
+                            },
+                            {
+                              "Name": "Max",
+                              "Type": "UInt",
+                              "Desc": "Last index in the range",
+                              "Required": false,
+                              "SortOrder": 150.0,
+                              "IsNullable": true,
+                              "Default": null
+                            },
+                            {
+                              "Name": "Contiguous",
+                              "Type": "Bool",
+                              "Desc": "Whether the key is contiguous",
+                              "Required": false,
+                              "SortOrder": 150.0,
+                              "IsNullable": false,
+                              "Default": true
+                            }
+                          ]
+                        },
+                        "Desc": "For a key column, this defines the range of values",
+                        "Aliases": [
+                          "key"
+                        ],
+                        "Required": false,
+                        "SortOrder": 150.0,
+                        "IsNullable": false,
+                        "Default": null
+                      }
+                    ]
+                  }
+                },
+                "Desc": "Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40",
+                "Aliases": [
+                  "col"
+                ],
+                "Required": false,
+                "SortOrder": 1.0,
+                "IsNullable": false,
+                "Default": null
+              },
+              {
+                "Name": "UseThreads",
+                "Type": "Bool",
+                "Desc": "Use separate parsing threads?",
+                "Aliases": [
+                  "threads"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": false,
+                "Default": true
+              },
+              {
+                "Name": "HeaderFile",
+                "Type": "String",
+                "Desc": "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.",
+                "Aliases": [
+                  "hf"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": false,
+                "Default": null
+              },
+              {
+                "Name": "MaxRows",
+                "Type": "Int",
+                "Desc": "Maximum number of rows to produce",
+                "Aliases": [
+                  "rows"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": true,
+                "Default": null
+              },
+              {
+                "Name": "AllowQuoting",
+                "Type": "Bool",
+                "Desc": "Whether the input may include quoted values, which can contain separator characters, colons, and distinguish empty values from missing values. When true, consecutive separators denote a missing value and an empty value is denoted by \"\". When false, consecutive separators denote an empty value.",
+                "Aliases": [
+                  "quote"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": false,
+                "Default": true
+              },
+              {
+                "Name": "AllowSparse",
+                "Type": "Bool",
+                "Desc": "Whether the input may include sparse representations",
+                "Aliases": [
+                  "sparse"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": false,
+                "Default": true
+              },
+              {
+                "Name": "InputSize",
+                "Type": "Int",
+                "Desc": "Number of source columns in the text data. Default is that sparse rows contain their size information.",
+                "Aliases": [
+                  "size"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": true,
+                "Default": null
+              },
+              {
+                "Name": "Separator",
+                "Type": {
+                  "Kind": "Array",
+                  "ItemType": "Char"
+                },
+                "Desc": "Source column separator.",
+                "Aliases": [
+                  "sep"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": false,
+                "Default": [
+                  "\t"
+                ]
+              },
+              {
+                "Name": "TrimWhitespace",
+                "Type": "Bool",
+                "Desc": "Remove trailing whitespace from lines",
+                "Aliases": [
+                  "trim"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": false,
+                "Default": false
+              },
+              {
+                "Name": "HasHeader",
+                "Type": "Bool",
+                "Desc": "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.",
+                "Aliases": [
+                  "header"
+                ],
+                "Required": false,
+                "SortOrder": 150.0,
+                "IsNullable": false,
+                "Default": false
+              }
+            ]
+          },
+          "Desc": "Arguments",
           "Aliases": [
-            "schema"
+            "args"
           ],
-          "Required": false,
+          "Required": true,
           "SortOrder": 2.0,
-          "IsNullable": false,
-          "Default": null
+          "IsNullable": false
         }
       ],
       "Outputs": [
@@ -117,6 +464,9 @@
           "Type": "DataView",
           "Desc": "The resulting data view"
         }
+      ],
+      "InputKind": [
+        "ILearningPipelineLoader"
       ]
     },
     {
@@ -21959,6 +22309,10 @@
         }
       ]
     },
+    {
+      "Kind": "ILearningPipelineLoader",
+      "Settings": []
+    },
     {
       "Kind": "IMulticlassClassificationOutput",
       "Settings": []
diff --git a/build.cmd b/build.cmd
index 872487cb88..c020999ade 100644
--- a/build.cmd
+++ b/build.cmd
@@ -1,2 +1,2 @@
-@call %~dp0run.cmd build %*
+@call "%~dp0run.cmd" build %*
 @exit /b %ERRORLEVEL%
diff --git a/docs/code/IDataViewDesignPrinciples.md b/docs/code/IDataViewDesignPrinciples.md
new file mode 100644
index 0000000000..c3f345bf68
--- /dev/null
+++ b/docs/code/IDataViewDesignPrinciples.md
@@ -0,0 +1,471 @@
+# IDataView Design Principles
+
+## Overview
+
+### Brief Introduction to IDataView
+
+The *IDataView system* is a set of interfaces and components that provide
+efficient, compositional processing of schematized data for machine learning
+and advanced analytics applications. It is designed to gracefully and
+efficiently handle high dimensional data and large data sets. It does not
+directly address distributed data and computation, but is suitable for single
+node processing of data partitions belonging to larger distributed data sets.
+
+IDataView is the data pipeline machinery for ML.NET. Microsoft teams consuming
+this library have implemented libraries of IDataView related components
+(loaders, transforms, savers, trainers, predictors, etc.) and have validated
+the performance, scalability and task flexibility benefits.
+
+The name IDataView was inspired from the database world, where the term table
+typically indicates a mutable body of data, while a view is the result of a
+query on one or more tables or views, and is generally immutable. Note that
+both tables and views are schematized, being organized into typed columns and
+rows conforming to the column types. Views differ from tables in several ways:
+
+* Views are *composable*. New views are formed by applying transformations
+  (queries) to other views. In contrast, forming a new table from an existing
+  table involves copying data, making the tables decoupled; the new table is
+  not linked to the original table in any way.
+
+* Views are *virtual*; tables are fully realized/persisted. In other words, a
+  table contains the values in the rows while a view computes values from
+  other views or tables, so does not contain or own the values.
+
+* Views are *immutable*; tables are mutable. Since a view does not contain
+  values, but merely computes values from its source views, there is no
+  mechanism for modifying the values.
+
+Note that immutability and compositionality are critical enablers of
+technologies that require reasoning over transformation, like query
+optimization and remoting. Immutability is also key for concurrency and thread
+safety. Views being virtual minimizes I/O, memory allocation, and computation.
+Information is accessed, memory is allocated, and computation is performed,
+only when needed to satisfy a local request for information.
+
+### Design Requirements
+
+The IDataView design fulfills the following design requirements:
+
+* **General schema**: Each view carries schema information, which specifies
+  the names and types of the view's columns, together with metadata associated
+  with the columns. The system is optimized for a reasonably small number of
+  columns (hundreds). See [here](#basics).
+
+* **Open type system**: The column type system is open, in the sense that new
+  data types can be introduced at any time and in any assembly. There is a set
+  of standard types (which may grow over time), but there is no registry of
+  all supported types. See [here](#basics).
+
+* **High dimensional data support**: The type system for columns includes
+  homogeneous vector types, so a set of related primitive values can be
+  grouped into a single vector-valued column. See [here](#vector-types).
+
+* **Compositional**: The IDataView design supports components of various
+  kinds, and supports composing multiple primitive components to achieve
+  higher-level semantics. See [here](#components).
+
+* **Open component system**: While the ML.NET code has a growing large library
+  of IDataView components, additional components that interoperate with these
+  may be implemented in other code bases. See [here](#components).
+
+* **Cursoring**: The rows of a view are accessed sequentially via a row
+  cursor. Multiple cursors can be active on the same view, both sequentially
+  and in parallel. In particular, views support multiple iterations through
+  the rows. Each cursor has a set of active columns, specified at cursor
+  construction time. Shuffling is supported via an optional random number
+  generator passed at cursor construction time. See [here](#cursoring).
+
+* **Lazy computation**: When only a subset of columns or a subset of rows is
+  requested, computation for other columns and rows can be, and generally is,
+  avoided. Certain transforms, loaders, and caching scenarios may be
+  speculative or eager in their computation, but the default is to perform
+  only computation needed for the requested columns and rows. See
+  [here](#lazy-computation-and-active-columns).
+
+* **Immutability and repeatability**: The data served by a view is immutable
+  and any computations performed are repeatable. In particular, multiple
+  cursors on the view produce the same row values in the same order (when
+  using the same shuffling). See [here](#immutability-and-repeatability).
+
+* **Memory efficiency**: The IDataView design includes cooperative buffer
+  sharing patterns that eliminate the need to allocate objects or buffers for
+  each row when cursoring through a view. See [here](#memory-efficiency).
+
+* **Batch-parallel computation**: The IDataView system includes the ability to
+  get a set of cursors that can be executed in parallel, with each individual
+  cursor serving up a subset of the rows. Splitting into multiple cursors can
+  be done either at the loader level or at an arbitrary point in a pipeline.
+  The component that performs splitting also provides the consolidation logic.
+  This enables computation heavy pipelines to leverage multiple cores without
+  complicating each individual transform implementation. See
+  [here](#batch-parallel-cursoring).
+
+* **Large data support**: Constructing views on data files and cursoring
+  through the rows of a view does not require the entire data to fit in
+  memory. Conversely, when the entire data fits, there is nothing preventing
+  it from being loaded entirely in memory. See [here](#data-size).
+
+### Design Non-requirements
+
+The IDataView system design does *not* include the following:
+
+* **Multi-view schema information**: There is no direct support for specifying
+  cross-view schema information, for example, that certain columns are primary
+  keys, and that there are foreign key relationships among tables. However,
+  the column metadata support, together with conventions, may be used to
+  represent such information.
+
+* **Standard ML schema**: The IDataView system does not define, nor prescribe,
+  standard ML schema representation. For example, it does not dictate
+  representation of nor distinction between different semantic interpretations
+  of columns, such as label, feature, score, weight, etc. However, the column
+  metadata support, together with conventions, may be used to represent such
+  interpretations.
+
+* **Row count**: A view is not required to provide its row count. The
+  `IDataView` interface has a `GetRowCount` method with type `Nullable<long>`.
+  When this returns `null`, the row count is not available directly from the
+  view.
+
+* **Efficient indexed row access**: There is no standard way in the IDataView
+  system to request the values for a specific row number. While the
+  `IRowCursor` interface has a `MoveMany(long count)` method, it only supports
+  moving forward `(count > 0)`, and is not necessarily more efficient than
+  calling `MoveNext()` repeatedly. See [here](#row-cursor).
+
+* **Data file formats**: The IDataView system does not dictate storage or
+  transport formats. It *does* include interfaces for loader and saver
+  components. The ML.NET code has implementations of loaders and savers for
+  some binary and text file formats.
+
+* **Multi-node computation over multiple data partitions**: The IDataView
+  design is focused on single node computation. We expect that in multi-node
+  applications, each node will be given its own data partition(s) to operate
+  on, with aggregation happening outside an IDataView pipeline.
+
+## Schema and Type System
+
+### Basics
+
+IDataView has general schema support, in that a view can have an arbitrary
+number of columns, each having an associated name, index, data type, and
+optional metadata.
+
+Column names are case sensitive. Multiple columns can share the same name, in
+which case, one of the columns hides the others, in the sense that the name
+will map to one of the column indices, the visible one. All user interaction
+with columns should be via name, not index, so the hidden columns are
+generally invisible to the user. However, hidden columns are often useful for
+diagnostic purposes.
+
+The set of supported column data types forms an open type system, in the sense
+that additional types can be added at any time and in any assembly. However,
+there is a precisely defined set of standard types including:
+
+* Text
+* Boolean
+* Single and Double precision floating point
+* Signed integer values using 1, 2, 4, or 8 bytes
+* Unsigned integer values using 1, 2, 4, or 8 bytes
+* Unsigned 16 byte values for ids and probabilistically unique hashes
+* Date time, date time zone, and timespan
+* Key types
+* Vector types
+
+The set of standard types will likely be expanded over time.
+
+The IDataView type system is specified in a separate document, *IDataView Type
+System Specification*.
+
+IDataView provides a general mechanism for associating semantic metadata with
+columns, such as designating sets of score columns, names associated with the
+individual slots of a vector-valued column, values associated with a key type
+column, whether a column's data is normalized, etc.
+
+While IDataView schema supports an arbitrary number of columns, it, like most
+schematized data systems, is designed for a modest number of columns,
+typically, limited to a few hundred. When a large number of *features* are
+required, the features should be gathered into one or more vector-valued
+columns, as discussed in the next section. This is important for both user
+experience and performance.
+
+### Vector Types
+
+Machine learning and advanced analytics applications often involve high-
+dimensional data. For example, a common technique for learning from text,
+known as [bag-of-words](https://en.wikipedia.org/wiki/Bag-of-words_model),
+represents each word in the text as a numeric feature containing the number of
+occurrences of that word. Another technique is indicator or one-hot encoding
+of categorical values, where, for example, a text-valued column containing a
+person's last name is expanded to a set of features, one for each possible
+name (Tesla, Lincoln, Gandhi, Zhang, etc.), with a value of one for the
+feature corresponding to the name, and the remaining features having value
+zero. Variations of these techniques use hashing in place of dictionary
+lookup. With hashing, it is common to use 20 bits or more for the hash value,
+producing `2^^20` (about a million) features or more.
+
+These techniques typically generate an enormous number of features.
+Representing each feature as an individual column is far from ideal, both from
+the perspective of how the user interacts with the information and how the
+information is managed in the schematized system. The solution is to represent
+each set of features, whether indicator values, or bag-of-words counts, as a
+single vector-valued column.
+
+A vector type specifies an item type and optional dimensionality information.
+The item type must be a primitive, non-vector, type. The optional
+dimensionality information specifies, at the basic level, the number of items
+in the corresponding vector values.
+
+When the size is unspecified, the vector type is variable-length, and
+corresponding vector values may have any length. A tokenization transform,
+that maps a text value to the sequence of individual terms in that text,
+naturally produces variable-length vectors of text. Then, a hashing ngram
+transform may map the variable-length vectors of text to a bag-of-ngrams
+representation, which naturally produces numeric vectors of length `2^^k`,
+where `k` is the number of bits used in the hash function.
+
+### Key Types
+
+The IDataView system includes the concept of key types. Key types are used for
+data that is represented numerically, but where the order and/or magnitude of
+the values is not semantically meaningful. For example, hash values, social
+security numbers, and the index of a term in a dictionary are all best modeled
+with a key type.
+
+## Components
+
+The IDataView system includes several standard kinds of components and the
+ability to compose them to produce efficient data pipelines. A loader
+represents a data source as an `IDataView`. A transform is applied to an
+`IDataView` to produce a derived `IDataView`. A saver serializes the data
+produced by an `IDataView` to a stream, in some cases in a format that can be
+read by a loader. There are other more specific kinds of components defined
+and used by the ML.NET code base, for example, scorers, evaluators, joins, and
+caches. While there are several standard kinds of components, the set of
+component kinds is open.
+
+### Transforms
+
+Transforms are a foundational kind of IDataView component. Transforms take an
+IDataView as input and produce an IDataView as output. Many transforms simply
+"add" one or more computed columns to their input schema. More precisely,
+their output schema includes all the columns of the input schema, plus some
+additional columns, whose values are computed from some of the input column
+values. It is common for an added column to have the same name as an input
+column, in which case, the added column hides the input column. Both the
+original column and new column are present in the output schema and available
+for downstream components (in particular, savers and diagnostic tools) to
+inspect. For example, a normalization transform may, for each slot of a
+vector-valued column named Features, apply an offset and scale factor and
+bundle the results in a new vector-valued column, also named Features. From
+the user's perspective (which is entirely based on column names), the Features
+column was "modified" by the transform, but the original values are available
+downstream via the hidden column.
+
+Some transforms require training, meaning that their precise behavior is
+determined automatically from some training data. For example, normalizers and
+dictionary-based mappers, such as the TermTransform, build their state from
+training data. Training occurs when the transform is instantiated from user-
+provided parameters. Typically, the transform behavior is later serialized.
+When deserialized, the transform is not retrained; its behavior is entirely
+determined by the serialized information.
+
+### Composition Examples
+
+Multiple primitive transforms may be applied to achieve higher-level
+semantics. For example, ML.NET's `CategoricalTransform` is the composition of
+two more primitive transforms, `TermTransform`, which maps each term to a key
+value via a dictionary, and `KeyToVectorTransform`, which maps from key value
+to indicator vector. Similarly, `CategoricalHashTransform` is the composition
+of `HashTransform`, which maps each term to a key value via hashing, and
+`KeyToVectorTransform`.
+
+Similarly, `WordBagTransform` and `WordHashBagTransform` are each the
+composition of three transforms. `WordBagTransform` consists of
+`WordTokenizeTransform`, `TermTransform`, and `NgramTransform`, while
+`WordHashBagTransform` consists of `WordTokenizeTransform`, `HashTransform`,
+and `NgramHashTransform`.
+
+## Cursoring
+
+### Row Cursor
+
+To access the data in a view, one gets a row cursor from the view by calling
+the `GetRowCursor` method. The row cursor is a movable window onto a single
+row of the view, known as the current row. The row cursor provides the column
+values of the current row. The `MoveNext()` method of the cursor advances to
+the next row. There is also a `MoveMany(long count)` method, which is
+semantically equivalent to calling `MoveNext()` repeatedly, `count` times.
+
+Note that a row cursor is not thread safe; it should be used in a single
+execution thread. However, multiple cursors can be active simultaneously on
+the same or different threads.
+
+### Lazy Computation and Active Columns
+
+It is common in a data pipeline for a down-stream component to only require a
+small subset of the information produced by the pipeline. For example, code
+that needs to build a dictionary of all terms used in a particular text column
+does not need to iterate over any other columns. Similarly, code to display
+the first 100 rows does not need to iterate through all rows. When up-stream
+computations are lazy, meaning that they are only performed when needed, these
+scenarios execute significantly faster than when the up-stream computation is
+eager (always performing all computations).
+
+The IDataView system enables and encourages components to be lazy in both
+column and row directions.
+
+A row cursor has a set of active columns, determined by arguments passed to
+`GetRowCursor`. Generally, the cursor, and any upstream components, will only
+perform computation or data movement necessary to provide values of the active
+columns. For example, when `TermTransform` builds its term dictionary from its
+input `IDataView`, it gets a row cursor from the input view with only the term
+column active. Any data loading or computation not required to materialize the
+term column is avoided. This is lazy computation in the column direction.
+
+Generally, creating a row cursor is a very cheap operation. The expense is in
+the data movement and computation required to iterate over the rows. If a
+cursor is used to iterate over a small subset of the input rows, then
+generally, only computation and data movement needed to materialize the
+requested rows is performed. This is lazy computation in the row direction.
+
+### Immutability and Repeatability
+
+Cursoring through data does not modify input data in any way. The root data is
+immutable, and the operations performed to materialize derived data are
+repeatable. In particular, the values produced by two cursors constructed from
+the same view with the same arguments to `GetRowCursor` will be identical.
+
+Immutability and repeatability enable transparent caching. For example, when a
+learning algorithm or other component requires multiple passes over an
+IDataView pipeline that includes non-trivial computation, performance may be
+enhanced by either caching to memory or caching to disk. Immutability and
+repeatability ensure that inserting caching is transparent to the learning
+algorithm.
+
+Immutability also ensures that execution of a composed data pipeline graph is
+safe for parallelism. Without the guarantee of immutability, nodes in a data
+flow graph can produce side effects that are visible to other non-dependent
+nodes. A system where multiple transforms worked by mutating data would be
+impossible to predict or reason about, short of the gross inefficiency of
+cloning of the source data to ensure consistency.
+
+The IDataView system's immutability guarantees enable flexible scheduling
+without the need to clone data.
+
+### Batch Parallel Cursoring
+
+The `GetRowCursor` method on `IDataView` includes options to allow or
+encourage parallel execution. If the view is a transform that can benefit from
+parallelism, it requests from its input view, not just a cursor, but a cursor
+set. If that view is a transform, it typically requests from its input view a
+cursor set, etc., on up the transformation chain. At some point in the chain
+(perhaps at a loader), a component, called the splitter, determines how many
+cursors should be active, creates those cursors, and returns them together
+with a consolidator object. At the other end, the consolidator is invoked to
+marshal the multiple cursors back into a single cursor. Intervening levels
+simply create a cursor on each input cursor, return that set of cursors as
+well as the consolidator.
+
+The ML.NET code base includes transform base classes that implement the
+minimal amount of code required to support this batch parallel cursoring
+design. Consequently, most transform implementations do not have any special
+code to support batch parallel cursoring.
+
+### Memory Efficiency
+
+Cursoring is inherently efficient from a memory allocation perspective.
+Executing `MoveNext()` requires no memory allocation. Retrieving primitive
+column values from a cursor also requires no memory allocation. To retrieve
+vector column values from a cursor, the caller can optionally provide buffers
+into which the values should be copied. When the provided buffers are
+sufficiently large, no additional memory allocation is required. When the
+buffers are not provided or are too small, the cursor allocates buffers of
+sufficient size to hold the values. This cooperative buffer sharing protocol
+eliminates the need to allocate separate buffers for each row. To avoid any
+allocation while iterating, client code only need allocate sufficiently large
+buffers up front, outside the iteration loop.
+
+Note that IDataView allows algorithms that need to materialize data in memory
+to do so. Nothing in the system prevents a component from cursoring through
+the source data and building a complete in-memory representation of the
+information needed, subject, of course, to available memory.
+
+### Data Size
+
+For large data scenarios, it is critical that the pipeline support efficient
+multiple pass "streaming" from disk. IDataView naturally supports streaming
+via cursoring through views. Typically, the root of a view is a loader that
+pulls information from a file or other data source. We have implemented both
+binary .idv and text-based loaders and savers. New loaders and savers can be
+added at any time.
+
+Note that when the data is small, and repeated passes over the data are
+needed, the operating system disk cache transparently enhances performance.
+Further, when the data is known to fit in memory, caching, as described above,
+provides even better performance.
+
+### Randomization
+
+Some training algorithms benefit from randomizing the order of rows produced
+by a cursor. An `IDataView` indicates via a property whether it supports
+shuffling. If it does, a random number generator passed to its `GetRowCursor`
+method indicates shuffling should happen, with seed information pulled from
+the random number generator. Serving rows from disk in a random order is quite
+difficult to do efficiently (without seeking for each row). The binary .idv
+loader has some shuffling support, favoring performance over attempting to
+provide a uniform distribution over the permutation space. This level of
+support has been validated to be sufficient for machine learning goals (e.g.,
+in recent work on SA-SDCA algorithm). When the data is all in memory, as it is
+when cached, randomizing is trivial.
+
+## Appendix: Comparison with LINQ
+
+This section is intended for developers familiar with the .Net
+`IEnumerable<T>` interface and the LINQ technologies.
+
+The `IDataView` interface is, in some sense, similar to `IEnumerable<T>`, and
+the IDataView system is similar to the LINQ eco-system. The comparisons below
+refer to the `IDataView` and `IEnumerable<T>` interfaces as the core
+interfaces of their respective worlds.
+
+In both worlds, there is a cursoring interface associated with the core
+interface. In the IEnumerable world, the cursoring interface is
+`IEnumerator<T>`. In the IDataView world, the cursoring interface is
+`IRowCursor`.
+
+Both cursoring interfaces have `MoveNext()` methods for forward-only iteration
+through the elements.
+
+Both cursoring interfaces provide access to information about the current
+item. For the IEnumerable world, the access is through the `Current` property
+of the enumerator. Note that when `T` is a class type, this suggests that each
+item served requires memory allocation. In the IDataView world, there is no
+single object that represents the current row. Instead, the values of the
+current row are directly accessible via methods on the cursor. This avoids
+memory allocation for each row.
+
+In both worlds, the item type information is carried by both the core
+interface and the cursoring interface. In the IEnumerable world, this type
+information is part of the .Net type, while in the IDataView world, the type
+information is much richer and contained in the schema, rather than in the
+.Net type.
+
+In both worlds, many different classes implement the core interface. In the
+IEnumerable world, developers explicitly write some of these classes, but many
+more implementing classes are automatically generated by the C# compiler, and
+returned from methods written using the C# iterator functionality (`yield
+return`). In the IDataView world, developers explicitly write all of the
+implementing classes, including all loaders and transforms. Unfortunately,
+there is no equivalent `yield return` magic.
+
+In both worlds, multiple cursors can be created and used.
+
+In both worlds, computation is naturally lazy in the row direction. In the
+IEnumerable world, laziness in the column direction would correspond to the
+returned `Current` value of type `T` lazily computing some of its properties.
+
+In both worlds, streaming from disk is naturally supported.
+
+Neither world supports indexed item access, nor a guarantee that the number of
+items is available without iterating and counting.
diff --git a/docs/code/IDataViewImplementation.md b/docs/code/IDataViewImplementation.md
new file mode 100644
index 0000000000..63fe48b64d
--- /dev/null
+++ b/docs/code/IDataViewImplementation.md
@@ -0,0 +1,518 @@
+# `IDataView` Implementation
+
+This document is intended as an essay on the best practices for `IDataView`
+implementations. As a prerequisite, we suppose that someone has read, and
+mostly understood, the following documents:
+
+* [Design principles](IDataViewDesignPrinciples.md) and
+* [Type system](IDataViewTypeSystem.md).
+
+and has also read and understood the code documentation for the `IDataView`
+and its attendant interfaces. Given that background, we will expand on best
+practices and common patterns that go into a successful implementation of
+`IDataView`, and motivate them with real examples, and historical learnings.
+
+Put another way: There are now within the ML.NET codebase many implementations
+of `IDataView` and many others in other related code bases that interface with
+ML.NET. The corresponding PRs and discussions have resulted in the
+accumulation of some information, stuff that is not and perhaps should not be
+covered in the specification or XML code documentation, but that is
+nonetheless quite valuable to know. That is, not the `IDataView` spec itself,
+but many of the logical implications of that spec.
+
+We will here start with the idioms and practices for `IDataView` generally,
+before launching into specific *types* of data views: right now there are two
+types of data views that have risen to the dignity of being "general": loaders
+and transforms. (There are many "specific" non-general data views: "array"
+data views, cache data views, join data views, data views for taking other
+abstractions for representing data and phrasing it in a way our code can
+understand, but these do not follow any more general pattern as loaders and
+transforms do.)
+
+# Urgency in Adhering to Invariants
+
+The point of `IDataView` is that it enables composable data pipelines. But
+what does that composability, practically, entail?
+
+There are many implementations of `IDataView` and `IDataTransform` in the
+ML.NET codebase. There are, further, many instances of `ITrainer` that consume
+those data views. There are more implementations of these currently outside of
+this codebase, totaling some hundreds. Astonishingly, they all actually work
+well together. The reason why so many transforms can work well with so many
+different dataviews as potential inputs, chained in arbitrary and strange ways
+we can hardly imagine, and feed well into so many instances of `ITrainer` is
+not of course because we wrote code to accommodate the Cartesian product of
+all possible inputs, but merely because we assume that any given
+implementation of `IDataView` obeys the invariants and principles it must.
+
+This is a general principal of software engineering, or indeed any
+engineering: it is nearly impossible to build any complex system of multiple
+parts unless those subcomponents adhere to whatever specifications they're
+supposed to, and fulfill their requirements.
+
+We can to some extent tolerate divergence from the invariants in *some*
+components, if they are isolated: we have some losses that behave strangely,
+even trainers behave somewhat strangely, sort of. Yet `IDataView` is the
+center of our data pipeline, and divergences are more potentially harmful.
+There is, for every requirement listed here, actually *something* that is
+relying on it.
+
+The inverse is also true: not only must `IDataView` conform to invariants,
+code that consumes `IDataView` should be robust to situations other than the
+"happy path." It needn't succeed, but it should at least be able to detect if
+data is not in the expected form and throw an error message to the user
+telling them how they misused it.
+
+To give the most common example of what I have seen in PRs: often one designs
+a transform or learner whose anticipated usage is that it will be used in
+conjunction with another transform "upstream" to prepare the data. (Again,
+this is very common: a `KeyToVector` transform for example assumes there's
+*something* upstream producing key values.) What happens sometimes is people
+forget to check that the input data actually *does* conform to that, with the
+result that if a pipeline was composed in some other fashion, there would be
+some error.
+
+The only thing you can really assume is that an `IDataView` behaves "sanely"
+according to the contracts of the `IDataView` interface, so that future ML.NET
+developers can form some reasonable expectations of how your code behaves, and
+also have a prayer of knowing how to maintain the code. It is hard enough to
+write software correctly even when the code you're working with actually does
+what it is supposed to, and impossible when it doesn't. Anyway, not to belabor
+the point: hidden undocumented implicit requirements on the usage
+
+# Design Decisions
+
+Presumably you are motivated to read this document because you have some
+problem of how to get some data into ML.NET, or process data using ML.NET, or
+something along these lines. There is a decision to be made about how to even
+engineer a solution. Sometimes it's quite obvious: text featurization
+obviously belongs as a transform. But other cases are *less* obvious. We will
+talk here about how we think about these things.
+
+One crucial question is whether something should be a data view at all: Often
+there is ambiguity. To give some examples of previously contentious points:
+should clustering be *transform* or a *trainer*? What about PCA? What about
+LDA? In the end, we decided clustering was a *trainer* and both PCA and LDA
+are *transforms*, but this decision was hardly unambiguous. Indeed, what
+purpose is served by considering trainers and transforms fundamentally
+different things, at all?
+
+Even once we decide whether something *should* be an `IDataView` of some sort,
+the question remains what type of data view. We have some canonical types of
+data views:
+
+If it involves taking data from a stream, like a file, or some sort of stream
+of data from a network, or other such thing, we might consider this a
+*loader*, that is, it should perhaps implement `IDataLoader`.
+
+If it involves taking a *single* data view, and transmuting it in some
+fashion, **and** the intent is this same transmutation might be applied to
+novel data, then it should perhaps implement `IDataTransform`, and be a
+transform.
+
+Now then, consider that not everything should be a loader, or a transform,
+even when data could be considered to be read from a stream, or when there is
+a data view based on another single data view. The essential purpose of loader
+and transforms is that they can exist as part of the data model, that is, they
+should be serializable and applicable to new data. A nice rule of thumb is: if
+when designing some you can imagine a scenario where you want to apply some
+logic to *both* a training set as well as a test set, then it might make sense
+to make it a loader or a transform. If not, it probably does not make sense.
+
+1. Often data comes from some programmatic source, as a starting point for an
+   ML.NET pipeline. Despite being at the head of the data pipe, it is *not* a
+   loader, because the data source is not a stream (though it is stream*ing*):
+   it is a `RowSetDataView`.
+
+2. During training, data is sometimes cached. the structure that handles the
+   data caching is a `CacheDataView`. It is absolutely not a transform,
+   despite taking a single input and being itself an `IDataView`. There is no
+   reason to make it a transform, because there is no plausible rationale to
+   make it part of the data model: the decision of whether you want to cache
+   data during *training* has nothing at all to do with whether you want to
+   cache data during *scoring*, so there is no point in saving it to the data
+   model.
+
+3. The ML.NET API for prediction uses a scheme that phrases input data
+   programmatically as coming from an enumerable of typed objects: the
+   underlying programmatic `IDataView` that is constructed to wrap this is
+   *not* a loader, because it is not part of the data model. It is merely the
+   entry point to the data model, at least, in typical usage.
+
+# Why `GetGetter`?
+
+Let us address something fairly conspicuous. The question almost everyone
+asks, when they first start using `IDataView`: what is up with these getters?
+
+One does not fetch values directly from an `IRow` implementation (including
+`IRowCursor`). Rather, one retains a delegate that can be used to fetch
+objects, through the `GetGetter` method on `IRow`. This delegate is:
+
+```csharp
+public delegate void ValueGetter<TValue>(ref TValue value);
+```
+
+If you are unfamiliar with delegates, [read
+this](https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/delegates/).
+Anyway: you open a row cursor, you get the delegate through this `GetGetter`
+method, and you use this delegate multiple times to fetch the actual column
+values as you `MoveNext` through the cursor.
+
+Some history to motivate this: In the first version of `IDataView` the
+`IRowCursor` implementation did not actually have these "getters" but rather
+had a method, `GetColumnValue<TValue>(int col, ref TValue val)`. However, this
+has the following problems:
+
+* **Every** call had to verify that the column was active,
+* **Every** call had to verify that `TValue` was of the right type,
+* When these were part of, say, a transform in a chain (as they often are,
+  considering how common transforms are used by ML.NET's users) each access
+  would be accompanied by a virtual method call to the upstream cursor's
+  `GetColumnValue`.
+
+In contrast, consider the situation with these getter delegates. The
+verification of whether the column is active happens *exactly* once. The
+verification of types happens *exactly* once. Rather than *every* access being
+passed up through a chain of dozens of transform cursors, you merely get a
+getter from whatever cursor is serving it up, and do every access directly
+without having to pass through umpteen virtual method calls (each, naturally,
+accompanied by their own checks!). With these preliminaries done, a getter on
+every iteration, when called, merely has to just fill in the value: all this
+verification work is already taken care of. The practical result of this is
+that, for some workloads where the getters merely amounted to assigning
+values, the "getter" method became an order of magnitude faster. So: we got
+rid of this `GetColumnValue` method, and now work with `GetGetter`.
+
+# Repeatability
+
+A single `IDataView` instance should be considered a consistent view onto
+data. So: if you open a cursor on the same `IDatView` instance, and access
+values for the same columns, it will apparently be a "consistent" view. It is
+probably obvious what this mean, but specifically:
+
+The cursor as returned through `GetRowCursor` (with perhaps an identically
+constructed `IRandom` instance) in any iteration should return the same number
+of rows on all calls, and with the same values at each row.
+
+Why is this important? Many machine learning algorithms require multiple
+passes over the dataset. Most stochastic methods wouldn't really care if the
+data changed, but others are *very* sensitive to changes in the data. For
+example, how could an L-BFGS or OWL-QN algorithm effectively compute its
+approximation to a Hessian, if the examples from which the per-pass history
+are computed were not consistent? How could a dual algorithm like SDCA
+function with any accuracy, if the examples associated with any given dual
+variable were to change? Consider even a relatively simple transform, like a
+forward looking windowed averager, or anything relating to time series. The
+implementation of those `ICursor` interfaces often open *two* cursors on the
+underlying `IDataView`, one "look ahead" cursor used to gather and calculate
+necessary statistics, and another cursor for any data: how could the column
+constructed out of that transform be meaningful of the look ahead cursor was
+consuming different data from the contemporaneous cursor? There are many
+examples of this throughout the codebase.
+
+Nevertheless: in very specific circumstances we have relaxed this. For
+example, some ML.NET API code serves up corrupt `IDataView` implementations
+that have their underlying data change, since reconstituting a data pipeline
+on fresh data is at the present moment too resource intensive. Nonetheless,
+this is wrong: for example, the `TrainingCursorBase` and related subclasses
+rely upon the data not changing. Since, however, that is used for *training*
+and the prediction engines of the API as used for *scoring*, we accept these.
+However this is not, strictly speaking, correct, and this sort of corruption
+of `IDataView` should only be considered as a last resort, and only when some
+great good can be accomplished through this. We certainly did not accept this
+corruption lightly!
+
+# Norms for the Data Model
+
+In a similar vein for repeatability and consistency is the notion of the data
+model. Unlike repeatability, this topic is a bit specialized: `IDataView`
+specifically is not serializable, but both `IDataLoader` and `IDataTransform`
+are serializable. Nonetheless those are the two most important types of data
+views, so we will treat on them here.
+
+From a user's perspective, when they run ML.NET and specify a loader or set of
+transforms, what they are doing is composing a data pipe. For example, perhaps
+they specify a way to load data from, say, a text file, apply some
+normalization, some categorical handling, some text, some this, some that,
+some everything, and it all just works, and is consistent whether we're
+applying that to the training data on which the transforms were defined, or
+some other test set, whether we programmatically load the model in the API and
+apply it to some production setting, whether we are running in a distributed
+environment and want to make sure *all* worker nodes are featurizing data in
+exactly the same way, etc. etc.
+
+The way in which this consistency is accomplished is by having certain
+requirements on the essential parts of the data model: loaders and transforms.
+The essential reason these things exist is so that they can be applied to new
+data in a consistent way.
+
+Let us formalize this somewhat. We consider two data views to be functionally
+identical if there is absolutely no way to distinguish them: they return the
+same values, have the same types, same number of rows, they shuffle
+identically given identically constructed `IRandom` when row cursors are
+constructed, return the same ID for rows from the ID getter, etc. Obviously
+this concept is transitive. (Of course, `Batch` in a cursor might be different
+between the two, but that is the case even with two cursors constructed on the
+same data view.) So some rules:
+
+1. If you have an `IDataLoader`, then saving/loading the associated data model
+   on the same data should result in a functionally identical `IDataLoader`.
+
+2. If you have an `IDataTransform`, then saving/loading the associated data
+   model for the transforms on functionally identical `IDataView`s, should
+   itself result in functionally identical `IDataView`s.
+
+## Versioning
+
+This requirement for consistency of a data model often has implications across
+versions of ML.NET, and our requirements for data model backwards
+compatibility. As time has passed, we often feel like it would make sense if a
+transform behaved *differently*, that is, if it organized or calculated its
+output in a different way than it currently does. For example, suppose we
+wanted to switch the hash transform to something a bit more efficient than
+murmur hashes, for example. If we did so, presumably the same input values
+would map to different outputs. We are free to do so, of course, yet: when we
+deserialize a hash transform from before we made this change, that hash
+transform should continue to output values as it did, before we made that
+change. (This, of course, assuming that the transform was released as part of
+a "blessed" non-preview point release of ML.NET. We can, and have, broken
+backwards compatibility for something that has not yet been incorporated in
+any sort of blessed release, though we prefer to not.)
+
+## What is Not Functionally Identical
+
+Note that identically *constructed* data views are not necessarily
+*functionally* identical. Consider this usage of the train and score transform
+with `xf=trainScore{tr=ap}`, where we first train averaged perceptron, then
+copy its score and probability columns out of the way, then construct the
+same basic transform again.
+
+```maml
+maml.exe showdata saver=md seed=1 data=breast-cancer.txt xf=trainScore{tr=ap}
+    xf=copy{col=ScoreA:Score col=ProbA:Probability} xf=trainScore{tr=ap}
+```
+
+The result is this.
+
+Label | Features                     | PredictedLabel | Score  | Probability  | ScoreA | ProbA
+------|------------------------------|----------------|--------|--------------|--------|-------
+0     | 5, 1, 1, 1, 2, 1, 3, 1, 1    | 0              | -62.07 | 0.0117       | -75.28 | 0.0107
+0     | 5, 4, 4, 5, 7, 10, 3, 2, 1   | 1              |  88.41 | 0.8173       |  92.04 | 0.8349
+0     | 3, 1, 1, 1, 2, 2, 3, 1, 1    | 0              | -40.53 | 0.0269       | -44.23 | 0.0329
+0     | 6, 8, 8, 1, 3, 4, 3, 7, 1    | 1              | 201.21 | 0.9973       | 208.07 | 0.9972
+0     | 4, 1, 1, 3, 2, 1, 3, 1, 1    | 0              | -43.11 | 0.0243       | -55.32 | 0.0221
+1     | 8, 10, 10, 8, 7, 10, 9, 7, 1 | 1              | 259.22 | 0.9997       | 257.43 | 0.9995
+0     | 1, 1, 1, 1, 2, 10, 3, 1, 1   | 1              |  71.10 | 0.6933       |  89.52 | 0.8218
+0     | 2, 1, 2, 1, 2, 1, 3, 1, 1    | 0              | -38.94 | 0.0286       | -39.59 | 0.0388
+0     | 2, 1, 1, 1, 2, 1, 1, 1, 5    | 0              | -32.87 | 0.0360       | -41.52 | 0.0362
+0     | 4, 2, 1, 1, 2, 1, 2, 1, 1    | 0              | -31.76 | 0.0376       | -41.68 | 0.0360
+
+One could argue it's not *really* identically constructed, exactly, since both
+of those transforms (including the underlying averaged perceptron learner!)
+are initialized using the pseudo-random number generator in an `IHost` that
+changes from one to another. But, that's a bit nit-picky.
+
+Note also: when we say functionally identical we include everything about it:
+not just the data, but the schema, its metadata, the implementation of
+shuffling, etc. For this reason, while serializing the data *model* has
+guarantees of consistency, serializing the *data* has no such guarantee: if
+you serialize data using the text saver, practically all metadata (except slot
+names) will be completely lost, which can have implications on how some
+transforms and downstream processes work. Or: if you serialize data using the
+binary saver, suddenly it may become shufflable whereas it may not have been
+before.
+
+The inevitable caveat to all this stuff about "consistency" is that it is
+ultimately limited by hardware and other runtime environment factors: the
+truth is, certain machines will, with identical programs with seemingly
+identical flows of execution result, *sometimes*, in subtly different answers
+where floating point values are concerned. Even on the same machine there are
+runtime considerations, e.g., when .NET's RyuJIT was introduced in VS2015, we
+had lots of test failures around our model consistency tests because the JIT
+was compiling the CLI just *slightly* differently. But, this sort of thing
+aside (which we can hardly help), we expect the models to be the same.
+
+# On Loaders, Data Models, and Empty `IMultiStreamSource`s
+
+When you create a loader you have the option of specifying not only *one* data
+input, but any number of data input files, including zero. But there's also a
+more general principle at work here with zero files: when deserializing a data
+loader from a data model with an `IMultiStreamSource` with `Count == 0` (e.g.,
+as would be constructed with `new MultiFileSource(null)`), we have a protocol
+that *every* `IDataLoader` should work in that circumstance, and merely be a
+data view with no rows, but the same schema as it had when it was serialized.
+The purpose of this is that we often have circumstances were we need to
+understand the schema of the data (what columns were produced, what the
+feature names are, etc.) when all we have is the data model. (E.g., the
+`savemodel` command, and other things.)
+
+# Getters Must Fail for Invalid Types
+
+For a given `IRow`, we must expect that `GetGetter<TValue>(col)` will throw if
+either `IsColumnActive(col)` is `false`, or `typeof(TValue) !=
+Schema.GetColumnType(col).RawType`, as indicated in the code documentation.
+But why? It might seem reasonable to add seemingly "harmless" flexibility to
+this interface. So let's imagine your type should be `float`, because the
+corresponding column's type's `RawType` is `typeof(float)`. Now: if you
+*happen* to call `GetGetter<double>(col)` instead of `GetGetter<float>(col)`,
+it would actually be a fairly easy matter for `GetGetter` to actually
+accommodate it, by doing the necessary transformations under the hood, and
+*not* fail. This type of thinking is actually insidiously and massively
+harmful to the codebase, as I will remark.
+
+The danger of writing code is that there's a chance someone might find it
+useful. Imagine a consumer of your dataview actually relies on your
+"tolerance." What that means, of course, is that this consuming code cannot
+function effectively on any *other* dataview. The consuming code is by
+definition *buggy*: it is requesting data of a type we've explicitly claimed,
+through the schema, that we do not support. And the developer, through a well
+intentioned but misguided design decision, has allowed buggy code to pass a
+test it should have failed, thus making the codebase more fragile when, if we
+had simply maintained requirements, would have otherwise detected the bug.
+
+Moreover: it is a solution to a problem that does not exist. `IDataView`s are
+fundamentally composable structures already, and one of the most fundamental
+operations you can do is transform columns into different types. So, there is
+no need for you to do the conversion yourself. Indeed, it is harmful for you
+to try: if we have the conversion capability in one place, including the logic
+of what can be converted and *how* these things are to be converted, is it
+reasonable to suppose we should have it in *every implementation of
+`IDataView`?* Certainly not. At best the situation will be needless complexity
+in the code: more realistically it will lead to inconsistency, and from
+inconsistency, surprises and bugs for users and developers.
+
+# Thread Safety
+
+Any `IDataView` implementation, as well as the `ISchema`, *must* be thread
+safe. There is a lot of code that depends on this. For example, cross
+validation works by operating over the same dataset (just, of course, filtered
+to different subsets of the data). That amounts to multiple cursors being
+opened, simultaneously, over the same data.
+
+So: `IDataView` and `ISchema` must be thread safe. However, `IRowCursor`,
+being a stateful object, we assume is accessed from exactly one thread at a
+time. The `IRowCursor`s returned through a `GetRowCursorSet`, however, which
+each single one must be accessed by a single thread at a time, multiple
+threads can access this set of cursors simultaneously: that's why we have that
+method in the first place.
+
+# Exceptions and Errors
+
+There is one non-obvious implication of the lazy evaluation while cursoring
+over an `IDataView`: while cursoring, you should almost certainly not throw
+exceptions.
+
+Imagine you have a `TextLoader`. You might expect that if you have a parse
+error, e.g., you have a column of floats, and one of the rows has a value
+like, `"hi!"` or something otherwise uninterpretable, you would throw. Yet,
+consider the implications of lazy evaluation. If that column were not
+selected, the cursoring would *succeed*, because it would not look at that
+`"hi!"` token *at all*, much less detect that it was not parsable as a float.
+
+If we were to throw, the effect is that *sometimes* the cursoring will succeed
+(if the column is not selected), and *sometimes* will fail (if not selected).
+These failures are explainable, ultimately, of course, in the sense that
+anything is explainable, but a user knows nothing about lazy evaluation or
+anything like this: correspondingly this is enormously confusing.
+
+The implication is that we should not throw an exception in this case. We
+instead consider this value "missing," and we *may* register a warning using
+an `IChannel.Warning`, but we cannot fail.
+
+So: If you could reasonably catch the exception on *any* cursoring over your
+`IDataView`, you can throw. If, however, detecting the condition on which you
+could throw the exception requires that a certain column be made active, then
+you should not throw. Of course, there are extreme circumstances: for example,
+one cannot help but throw on a cursoring if, say, there is some weird system
+event, and if one somehow detects in a subsequent iteration that something is
+fundamentally broken then you can throw: e.g., the binary loader will throw if
+it detects the file it is reading is corrupted, even if that corruption may
+not have been obvious immediately.
+
+# `GetGetter` Returning the Same Delegate
+
+On a single instance of `IRowCursor`, since each `IRowCursor` instance has no
+requirement to be thread safe, it is entirely legal for a call to `GetGetter`
+on a single column to just return the same getting delegate. It has come to
+pass that the majority of implementations of `IRowCursor` actually do that,
+since it is in some ways easier to write the code that way.
+
+This practice has inadvertently enabled a fairly attractive tool for analysis
+of data pipelines: by returning the same delegate each time, we can check in a
+data pipeline what data is being passed through by seeing whether the
+references to getter delegates are being passed through. Now this is
+imperfect, because some transforms that could use the same delegate each time
+do not, but the vast majority do.
+
+# Class Structuring
+
+The essential attendant classes of an `IDataView` are its schema, as returned
+through the `Schema` property, as well as the `IRowCursor` implementation(s),
+as returned through the `GetRowCursor` and `GetRowCursorSet` methods. The
+implementations for those two interfaces are typically nested within the
+`IDataView` implementation itself. The cursor implementation is almost always
+at the bottom of the data view class.
+
+# `IRow` and `ICursor` vs. `IRowCursor`
+
+We have `IRowCursor` which descends from both `IRow` and `ICursor`. Why do
+these other interfaces exist?
+
+Firstly, there are implementations of `IRow` or `ICursor` that are not
+`IRowCursor`s. We have occasionally found it useful to have something
+resembling a key-value store, but that is strongly, dynamically typed in some
+fashion. Why not simply represent this using the same idioms of `IDataView`?
+So we put them in an `IRow`. Similarly: we have several things that behave
+*like* cursors, but that are in no way *row* cursors.
+
+However, more than that, there are a number of utility functions where we want
+to operate over something like an `IRowCursor`, but we want to have some
+indication that this function will not move the cursor (in which case `IRow`
+is helpful), or that will not access any values (in which case `ICursor` is
+helpful).
+
+# Schema
+
+The schema contains information about the columns. As we see in [the design
+principles](IDataViewDesignPrinciples.md), it has index, data type, and
+optional metadata.
+
+While *programmatically* accesses to an `IDataView` are by index, from a
+user's perspective the indices are by name; most training algorithms
+conceptually train on the `Features` column (under default settings). For this
+reason nearly all usages of an `IDataView` will be prefixed with a call to the
+schema's `TryGetColumnIndex`.
+
+Regarding name hiding, the principles mention that when multiple columns have
+the same name, other columns are "hidden." The convention all implementations
+of `ISchema` obey is that the column with the *largest* index. Note however
+that this is merely convention, not part of the definition of `ISchema`.
+
+Implementations of `TryGetColumnIndex` should be O(1), that is, practically,
+this mapping ought to be backed with a dictionary in most cases. (There are
+obvious exceptions like, say, things like `LineLoader` which produce exactly
+one column. There, a simple equality test suffices.)
+
+It is best if `GetColumnType` returns the *same* object every time. That is,
+things like key-types and vector-types, when returned, should not be created
+in the function itself (thereby creating a new object every time), but rather
+stored somewhere and returned.
+
+## Metadata
+
+Since metadata is *optional*, one is not obligated to necessarily produce it,
+or conform to any particular schemas for any particular kinds (beyond, say,
+the obvious things like making sure that the types and values are consistent).
+However, the flip side of that freedom given to *producers*, is that
+*consumers* are obligated, when processing a data view input, to react
+gracefully when metadata of a certain kind is absent, or not in a form that
+one expects. One should *never* fail when input metadata is in a form one does
+not expect.
+
+To give a practical example of this: many transforms, learners, or other
+components that process `IDataView`s will do something with the slot names,
+but when the `SlotNames` metadata kind for a given column is either absent,
+*or* not of the right type (vectors of strings), *or* not of the right size
+(same length vectors as the input), the behavior is not to throw or yield
+errors or do anything of the kind, but to simply say, "oh, I don't really have
+slot names," and proceed as if the slot names hadn't been present at all.
\ No newline at end of file
diff --git a/docs/code/IDataViewTypeSystem.md b/docs/code/IDataViewTypeSystem.md
new file mode 100644
index 0000000000..c152a667cf
--- /dev/null
+++ b/docs/code/IDataViewTypeSystem.md
@@ -0,0 +1,843 @@
+# `IDataView` Type System
+
+## Overview
+
+The *IDataView system* consists of a set of interfaces and classes that
+provide efficient, compositional transformation of and cursoring through
+schematized data, as required by many machine-learning and data analysis
+applications. It is designed to gracefully and efficiently handle both
+extremely high dimensional data and very large data sets. It does not directly
+address distributed data, but is suitable for single node processing of data
+partitions belonging to larger distributed data sets.
+
+While `IDataView` is one interface in this system, colloquially, the term
+IDataView is frequently used to refer to the entire system. In this document,
+the specific interface is written using fixed pitch font as `IDataView`.
+
+IDataView is the data pipeline machinery for ML.NET. The ML.NET codebase has
+an extensive library of IDataView related components (loaders, transforms,
+savers, trainers, predictors, etc.). More are being worked on.
+
+The name IDataView was inspired from the database world, where the term table
+typically indicates a mutable body of data, while a view is the result of a
+query on one or more tables or views, and is generally immutable. Note that
+both tables and views are schematized, being organized into typed columns and
+rows conforming to the column types. Views differ from tables in several ways:
+
+* Views are immutable; tables are mutable.
+
+* Views are composable -- new views can be formed by applying transformations
+  (queries) to other views. Forming a new table from an existing table
+  involves copying data, making them decoupled—the new table is not linked to
+  the original table in any way.
+
+* Views are virtual; tables are fully realized/persisted.
+
+Note that immutability and compositionality are critical enablers of
+technologies that require reasoning over transformation, like query
+optimization and remoting. Immutability is also key for concurrency and thread
+safety.
+
+This document includes a very brief introduction to some of the basic concepts
+of IDataView, but then focuses primarily on the IDataView type system.
+
+Why does IDataView need a special type system? The .NET type system is not
+well suited to machine-learning and data analysis needs. For example, while
+one could argue that `typeof(double[])` indicates a vector of double values,
+it explicitly does not include the dimensionality of the vector/array.
+Similarly, there is no good way to indicate a subset of an integer type, for
+example integers from 1 to 100, as a .NET type. In short, there is no
+reasonable way to encode complete range and dimensionality information in a
+`System.Type`.
+
+In addition, a well-defined type system, including complete specification of
+standard data types and conversions, enables separately authored components to
+seamlessly work together without surprises.
+
+### Basic Concepts
+
+`IDataView`, in the narrow sense, is an interface implemented by many
+components. At a high level, it is analogous to the .Net interface
+`IEnumerable<T>`, with some very significant differences.
+
+While `IEnumerable<T>` is a sequence of objects of type `T`, `IDataView` is a
+sequence of rows. An `IDataView` object has an associated `ISchema` object
+that defines the `IDataView`'s columns, including their names, types, indices,
+and associated metadata. Each row of the `IDataView` has a value for each
+column defined by the schema.
+
+Just as `IEnumerable<T>` has an associated enumerator interface, namely
+`IEnumerator<T>`, `IDataView` has an associated cursor interface, namely
+`IRowCursor`. In the enumerable world, an enumerator object implements a
+Current property that returns the current value of the iteration as an object
+of type `T`. In the IDataView world, an `IRowCursor` object encapsulates the
+current row of the iteration. There is no separate object that represents the
+current row. Instead, the cursor implements methods that provide the values of
+the current row, when requested. Additionally, the methods that serve up
+values do not require memory allocation on each invocation, but use sharable
+buffers. This scheme significantly reduces the memory allocations needed to
+cursor through data.
+
+Both `IDataView` and `IEnumerable<T>` present a read-only view on data, in the
+sense that a sequence presented by each is not directly mutable.
+"Modifications" to the sequence are accomplished by additional operators or
+transforms applied to the sequence, so do not modify any underlying data. For
+example, to normalize a numeric column in an `IDataView` object, a
+normalization transform is applied to the sequence to form a new `IDataView`
+object representing the composition. In the new view, the normalized values
+are contained in a new column. Often, the new column has the same name as the
+original source column and "replaces" the source column in the new view.
+Columns that are not involved in the transformation are simply "passed
+through" from the source `IDataView` to the new one.
+
+Detailed specifications of the `IDataView`, `ISchema`, and `IRowCursor`
+interfaces are in other documents.
+
+### Column Types
+
+Each column in an `IDataView` has an associated column type. The collection of
+column types is open, in the sense that new code can introduce new column
+types without requiring modification of all `IDataView` related components.
+While introducing new types is possible, we expect it will also be relatively
+rare.
+
+All column type implementations derive from the abstract class `ColumnType`.
+Primitive column types are those whose implementation derives from the
+abstract class `PrimitiveType`, which derives from `ColumnType`.
+
+### Representation Type
+
+A column type has an associated .Net type, known as its representation type or
+raw type.
+
+Note that a column type often contains much more information than the
+associated .Net representation type. Moreover, many distinct column types can
+use the same representation type. Consequently, code should not assume that a
+particular .Net type implies a particular column type.
+
+### Standard Column Types
+
+There is a set of predefined standard column types, divided into standard
+primitive types and vector types. Note that there can be types that are
+neither primitive nor vector types. These types are not standard types and may
+require extra care when handling them. For example, a `PictureType` value
+might require disposing when it is no longer needed.
+
+Standard primitive types include the text type, the boolean type, numeric
+types, and key types. Numeric types are further split into floating-point
+types, signed integer types, and unsigned integer types.
+
+A vector type has an associated item type that must be a primitive type, but
+need not be a standard primitive type. Note that vector types are not
+primitive types, so vectors of vectors are not supported. Note also that
+vectors are homogeneous—all elements are of the same type. In addition to its
+item type, a vector type contains dimensionality information. At the basic
+level, this dimensionality information indicates the length of the vector
+type. A length of zero means that the vector type is variable length, that is,
+different values may have different lengths. Additional detail of vector types
+is in a subsequent section. Vector types are instances of the sealed class
+`VectorType`, which derives from `ColumnType`.
+
+This document uses convenient shorthand for standard types:
+
+* `TX`: text
+
+* `BL`: boolean
+
+* `R4`, `R8`: single and double precision floating-point
+
+* `I1`, `I2`, `I4`, `I8`: signed integer types with the indicated number of
+  bytes
+
+* `U1`, `U2`, `U4`, `U8`: unsigned integer types with the indicated number of
+  bytes
+
+* `UG`: unsigned type with 16-bytes, typically used as a unique ID
+
+* `TS`: timespan, a period of time
+
+* `DT`: datetime, a date and time but no timezone
+
+* `DZ`: datetime zone, a date and time with a timezone
+
+* `U4[100-199]`: A key type based on `U4` representing legal values from 100
+  to 199, inclusive
+
+* `V<R4,3,2>`: A vector type with item type `R4` and dimensionality
+  information [3,2]
+
+See the sections on the specific types for more detail.
+
+The IDataView system includes many standard conversions between standard
+primitive types. A later section contains a full specification of these
+conversions.
+
+### Default Value
+
+Each column type has an associated default value corresponding to the default
+value of its representation type, as defined by the .Net (C# and CLR)
+specifications.
+
+The standard conversions map source default values to destination default
+values. For example, the standard conversion from `TX` to `R8` maps the empty
+text value to the value zero. Note that the empty text value is distinct from
+the missing text value, as discussed next.
+
+### Missing Value
+
+Most of the standard primitive types support the notion of a missing value. In
+particular, the text type, floating-point types, signed integer types, and key
+types all have an internal representation of missing. We follow R's lead and
+denote such values as `NA`.
+
+Unlike R, the standard primitive types do not distinguish between missing and
+invalid. For example, in floating-point arithmetic, computing zero divided by
+zero, or infinity minus infinity, produces an invalid value known as a `NaN`
+(for Not-a-Number). R uses a specific `NaN` value to represent its `NA` value,
+with all other `NaN` values indicating invalid. The IDataView standard
+floating-point types do not distinguish between the various `NaN` values,
+treating them all as missing/invalid.
+
+A standard conversion from a source type with `NA` to a destination type with
+`NA` maps `NA` to `NA`. A standard conversion from a source type with `NA` to
+a destination type without `NA` maps `NA` to the default value of the
+destination type. For example, converting a text `NA` value to `R4` produces a
+`NaN`, but converting a text `NA` to `U4` results in zero. Note that this
+specification does not address diagnostic user messages, so, in certain
+environments, the latter situation may generate a warning to the user.
+
+Note that a vector type does not support a representation of missing, but may
+contain `NA` values of its item type. Generally, there is no standard
+mechanism faster than O(N) for determining whether a vector with N items
+contains any missing values.
+
+For further details on missing value representations, see the sections
+detailing the particular standard primitive types.
+
+### Vector Representations
+
+Values of a vector type may be represented either sparsely or densely. A
+vector type does not mandate denseness or sparsity, nor does it imply that one
+is favored over the other. A sparse representation is semantically equivalent
+to a dense representation having the suppressed entries filled in with the
+*default* value of the item type. Note that the values of the suppressed
+entries are emphatically *not* the missing/`NA` value of the item type, unless
+the missing and default values are identical, as they are for key types.
+
+### Metadata
+
+A column in an `ISchema` can have additional column-wide information, known as
+metadata. For each string value, known as a metadata kind, a column may have a
+value associated with that metadata kind. The value also has an associated
+type, which is a compatible column type.
+
+For example:
+
+* A column may indicate that it is normalized, by providing a `BL` valued
+  piece of metadata named `IsNormalized`.
+
+* A column whose type is `V<R4,17>`, meaning a vector of length 17 whose items
+  are single-precision floating-point values, might have `SlotNames` metadata
+  of type `V<TX,17>`, meaning a vector of length 17 whose items are text.
+
+* A column produced by a scorer may have several pieces of associated
+  metadata, indicating the "scoring column group id" that it belongs to, what
+  kind of scorer produced the column (e.g., binary classification), and the
+  precise semantics of the column (e.g., predicted label, raw score,
+  probability).
+
+The `ISchema` interface, including the metadata API, is fully specified in
+another document.
+
+## Text Type
+
+The text type, denoted by the shorthand `TX`, represents text values. The
+`TextType` class derives from `PrimitiveType` and has a single instance,
+exposed as `TextType.Instance`. The representation type of `TX` is an
+immutable struct known as `DvText`. A `DvText` value represents a sequence of
+characters whose length is contained in its `Length` field. The missing/`NA`
+value has a `Length` of -1, while all other values have a non-negative
+`Length`. The default value has a `Length` of zero and represents an empty
+sequence of characters.
+
+In text processing transformations, it is very common to split text into
+pieces. A key advantage of using `DvText` instead of `System.String` for text
+values is that these splits require no memory allocation—the derived `DvText`
+references the same underlying `System.String` as the original `DvText` does.
+Another reason that `System.String` is not ideal for text is that we want the
+default value to be empty and not `NA`. For `System.String`, the default value
+is null, which would be a more natural representation for `NA` than for empty
+text. By using a custom struct wrapper around a portion (or span) of a
+`System.String`, we address both the memory efficiency and default value
+problems.
+
+## Boolean Type
+
+The standard boolean type, denoted by the shorthand `BL`, represents
+true/false values. The `BooleanType` class derives from `PrimitiveType` and
+has a single instance, exposed as `BooleanType.Instance`. The representation
+type of `BL` is the `DvBool` enumeration type, logically stored as `sbyte`:
+
+`DvBool` | `sbyte` Value
+--------:|:-------------
+`NA`     | -128
+`False`  | 0
+`True`   | 1
+
+The default value of `BL` is `DvBool.False` and the `NA` value of `BL` is
+`DvBool.NA`. Note that the underlying type of the `DvBool` `enum` is signed
+byte and the default and `NA` values of `BL` align with the default and `NA`
+values of `I1`.
+
+There is a standard conversion from `TX` to `BL`. There are standard
+conversions from `BL` to all signed integer and floating point numeric types,
+with `DvBool.False` mapping to zero, `DvBool.True` mapping to one, and
+`DvBool.NA` mapping to `NA`.
+
+## Number Types
+
+The standard number types are all instances of the sealed class NumberType,
+which is derived from PrimitiveType. There are two standard floating-point
+types, four standard signed integer types, and four standard unsigned integer
+types. Each of these is represented by a single instance of NumberType and
+there are static properties of NumberType to access each instance. For
+example, to test whether a variable type represents `I4`, use the C# code
+`type == NumberType.I4`.
+
+Floating-point arithmetic has a well-deserved reputation for being
+troublesome. This is primarily because it is imprecise, in the sense that the
+result of most operations must be rounded to the nearest representable value.
+This rounding means, among other side effects, that floating-point addition
+and multiplication are not associate, nor satisfy the distributive property.
+
+However, in many ways, floating-point arithmetic is the best-suited system for
+arithmetic computation. For example, the IEEE 754 specification mandates
+precise graceful overflow behavior—as results grow, they lose resolution in
+the least significant digits, and eventually overflow to a special infinite
+value. In contrast, when integer arithmetic overflows, the result is a non-
+sense value. Trapping and handling integer overflow is expensive, both in
+runtime and development costs.
+
+The IDataView system supports integer numeric types mostly for data
+interchange convenience, but we strongly discourage performing arithmetic on
+those values without first converting to floating-point.
+
+### Floating-point Types
+
+The floating-point types, `R4` and `R8`, have representation types
+`System.Single` and `System.Double`. Their default values are zero. Any `NaN`
+is considered an `NA` value, with the specific `Single.NaN` and `Double.NaN`
+values being the canonical `NA` values.
+
+There are standard conversions from each floating-point type to the other
+floating-point type. There are also standard conversions from text to each
+floating-point type and from each integer type to each floating-point type.
+
+### Signed Integer Types
+
+The signed integer types, `I1`, `I2`, `I4`, and `I8`, have representation
+types Sytem.SByte, `System.Int16`, `System.Int32`, and `System.Int64`. The
+default value of each of these is zero. Each of these has a non-zero value
+that is its own additive inverse, namely `(-2)^^{8n-1}`, where `n` is the
+number of bytes in the representation type. This is the minimum value of each
+of these types. We follow R's lead and use these values as the `NA` values.
+
+There are standard conversions from each signed integer type to every other
+signed integer type. There are also standard conversions from text to each
+signed integer type and from each signed integer type to each floating-point
+type.
+
+Note that we have not defined standard conversions from floating-point types
+to signed integer types.
+
+### Unsigned Integer Types
+
+The unsigned integer types, `U1`, `U2`, `U4`, and `U8`, have representation
+types Sytem.Byte, `System.UInt16`, `System.UInt32`, and `System.UInt64`,
+respectively. The default value of each of these is zero. These types do not
+have an `NA` value.
+
+There are standard conversions from each unsigned integer type to every other
+unsigned integer type. There are also standard conversions from text to each
+unsigned integer type and from each unsigned integer type to each floating-
+point type.
+
+Note that we have not defined standard conversions from floating-point types
+to unsigned integer types, or between signed integer types and unsigned
+integer types.
+
+## Key Types
+
+Key types are used for data that is represented numerically, but where the
+order and/or magnitude of the values is not semantically meaningful. For
+example, hash values, social security numbers, and the index of a term in a
+dictionary are all best modeled with a key type.
+
+The representation type of a key type, also called its underlying type, must
+be one of the standard four .Net unsigned integer types. The `NA` and default
+values of a key type are the same value, namely the representational value
+zero.
+
+Key types are instances of the sealed class `KeyType`, which derives from
+`PrimitiveType`.
+
+In addition to its underlying type, a key type specifies:
+
+* A count value, between `0` and `int.MaxValue`, inclusive
+
+* A "minimum" value, between `0` and `ulong.MaxValue`, inclusive
+
+* A Boolean value indicating whether the values of the key type are contiguous
+
+Regardless of the minimum and count values, the representational value zero
+always means `NA` and the representational value one is always the first valid
+value of the key type.
+
+Notes:
+
+* The `Count` property returns the count of the key type. This is of type
+  `int`, but is required to be non-negative. When `Count` is zero, the key
+  type has no known or useful maximum value. Otherwise, the legal
+  representation values are from one up to and including `Count`. The `Count`
+  is required to be representable in the underlying type, so, for example, the
+  `Count` value of a key type based on `System.Byte` must not exceed `255`. As
+  an example of the usefulness of the `Count` property, consider the
+  `KeyToVector` transform implemented as part of ML.NET. It maps from a key
+  type value to an indicator vector. The length of the vector is the `Count`
+  of the key type, which is required to be positive. For a key value of `k`,
+  with `1 ≤ k ≤ Count`, the resulting vector has a value of one in the
+  (`k-1`)th slot, and zero in all other slots. An `NA` value (with
+  representation zero) is mapped to the all- zero vector of length `Count`.
+
+* For a key type with positive `Count`, a representation value should be
+  between `0` and `Count`, inclusive, with `0` meaning `NA`. When processing
+  values from an untrusted source, it is best to guard against values bigger
+  than `Count` and treat such values as equivalent to `NA`.
+
+* The `Min` property returns the minimum semantic value of the key type. This
+  is used exclusively for transforming from a representation value, where the
+  valid values start at one, to user facing values, which might start at any
+  non-negative value. The most common values for `Min` are zero and one.
+
+* The boolean `Contiguous` property indicates whether values of the key type
+  are generally contiguous in the sense that a complete sampling of
+  representation values of the key type would cover most, if not all, values
+  from one up to their max. A `true` value indicates that using an array to
+  implement a map from the key type values is a reasonable choice. When
+  `false`, it is likely more prudent to use a hash table.
+
+* A key type can be non-`Contiguous` only if `Count` is zero. The converse
+  however is not true. A key type that is contiguous but has `Count` equal to
+  zero is one where there is a reasonably small maximum, but that maximum is
+  unknown. In this case, an array might be a good choice for a map from the
+  key type.
+
+* The shorthand for a key type with representation type `U1`, and semantic
+  values from `1000` to `1099`, inclusive, is `U1[1000-1099]`. Note that the
+  `Min` value of this key type is outside the range of the underlying type,
+  `System.Byte`, but the `Count` value is only `100`, which is representable
+  in a `System.Byte`. Recall that the representation values always start at 1
+  and extend up to `Count`, in this case `100`.
+
+* For a key type with representation type `System.UInt32` and semantic values
+  starting at `1000`, with no known maximum, the shorthand is `U4[1000-*]`.
+
+There are standard conversions from text to each key type. This conversion
+parses the text as a standard non-negative integer value and honors the `Min`
+and `Count` values of the key type. If a parsed numeric value falls outside
+the range indicated by `Min` and `Count`, or if the text is not parsable as a
+non-negative integer, the result is `NA`.
+
+There are standard conversions from one key type to another, provided:
+
+* The source and destination key types have the same `Min` and `Count` values.
+
+* Either the number of bytes in the destination's underlying type is greater
+  than the number of bytes in the source's underlying type, or the `Count`
+  value is positive. In the latter case, the `Count` is necessarily less than
+  2k, where k is the number of bits in the destination type's underlying type.
+  For example, `U1[1-*]` can be converted to `U2[1-*]`, but `U2[1-*]` cannot
+  be converted to `U1[1-*]`. Also, `U1[1-100]` and `U2[1-100]` can be
+  converted in both directions.
+
+## Vector Types
+
+### Introduction
+
+Vector types are one of the key innovations of the IDataView system and are
+critical for high dimensional machine-learning applications.
+
+For example, when processing text, it is common to hash all or parts of the
+text and encode the resulting hash values, first as a key type, then as
+indicator or bag vectors using the `KeyToVector` transform. Using a `k`-bit
+hash produces a key type with `Count` equal to `2^^k`, and vectors of the same
+length. It is common to use `20` or more hash bits, producing vectors of
+length a million or more. The vectors are typically very sparse. In systems
+that do not support vector-valued columns, each of these million or more
+values is placed in a separate (sparse) column, leading to a massive explosion
+of the column space. Most tabular systems are not designed to scale to
+millions of columns, and the user experience also suffers when displaying such
+data. Moreover, since the vectors are very sparse, placing each value in its
+own column means that, when a row is being processed, each of those sparse
+columns must be queried or scanned for its current value. Effectively the
+sparse matrix of values has been needlessly transposed. This is very
+inefficient when there are just a few (often one) non-zero entries among the
+column values. Vector types solve these issues.
+
+A vector type is an instance of the sealed `VectorType` class, which derives
+from `ColumnType`. The vector type contains its `ItemType`, which must be a
+`PrimitiveType`, and its dimensionality information. The dimensionality
+information consists of one or more non-negative integer values. The
+`VectorSize` is the product of the dimensions. A dimension value of zero means
+that the true value of that dimension can vary from value to value.
+
+For example, tokenizing a text by splitting it into multiple terms generates a
+vector of text of varying/unknown length. The result type shorthand is
+`V<TX,*>`. Hashing this using `6` bits then produces the vector type
+`V<U4[0-63],*>`. Applying the `KeyToVector` transform then produces the vector
+type `V<R4,*,64>`. Each of these vector types has a `VectorSize` of zero,
+indicating that the total number of slots varies, but the latter still has
+potentially useful dimensionality information: the vector slots are
+partitioned into an unknown number of runs of consecutive slots each of length
+`64`.
+
+As another example, consider an image data set. The data starts with a `TX`
+column containing URLs for images. Applying an `ImageLoader` transform
+generates a column of a custom (non-standard) type, `Picture<*,*,4>`, where
+the asterisks indicate that the picture dimensions are unknown. The last
+dimension of `4` indicates that there are four channels in each pixel: the
+three color components, plus the alpha channel. Applying an `ImageResizer`
+transform scales and crops the images to a specified size, for example,
+`100x100`, producing a type of `Picture<100,100,4>`. Finally, applying a
+`ImagePixelExtractor` transform (and specifying that the alpha channel should
+be dropped), produces the vector type `V<R4,3,100,100>`. In this example, the
+`ImagePixelExtractor` re-organized the color information into separate planes,
+and divided each pixel value by 256 to get pixel values between zero and one.
+
+### Equivalence
+
+Note that two vector types are equivalent when they have equivalent item types
+and have identical dimensionality information. To test for compatibility,
+instead of equivalence, in the sense that the total `VectorSize` should be the
+same, use the `SameSizeAndItem` method instead of the Equals method (see the
+`ColumnType` code below).
+
+### Representation Type
+
+The representation type of a vector type is the struct `VBuffer<T>`, where `T`
+is the representation type of the item type. For example, the representation
+type of `V<R8,10>` is `VBuffer<double>`. When the vector type's `VectorSize`
+is positive, each value of the type will have length equal to the
+`VectorSize`.
+
+The struct `VBuffer<T>`, sketched below, provides both dense and sparse
+representations and encourages cooperative buffer sharing. A complete
+discussion of `VBuffer<T>` and associated coding idioms is in another
+document.
+
+Notes:
+
+* `VBuffer<T>` contains four public readonly fields: `Length`, `Count`,
+  `Values`, and `Indices`.
+
+* `Length` is the logical length of the vector, and must be non-negative.
+
+* `Count` is the number of items explicitly represented in the vector. `Count`
+  is non-negative and less than or equal to Length.
+
+* When `Count` is equal to Length, the vector is dense. Otherwise, the vector
+  is sparse.
+
+* The `Values` array contains the explicitly represented item values. The
+  length of the `Values` array is at least `Count`, but not necessarily equal
+  to `Count`. Only the first `Count` items in `Values` are part of the vector;
+  any remaining items are garbage and should be ignored. Note that when
+  `Count` is zero, `Values` may be null.
+
+* The `Indices` array is only relevant when the vector is sparse. In the
+  sparse case, `Indices` is parallel to `Values`, only the first `Count` items
+  are meaningful, the indices must be non-negative and less than `Length`, and
+  the indices must be strictly increasing. Note that when `Count` is zero,
+  `Indices` may be null. In the dense case, `Indices` is not meaningful and
+  may or may not be null.
+
+* It is very common for the arrays in a `VBuffer<T>` to be larger than needed
+  for their current value. A special case of this is when a dense `VBuffer<T>`
+  has a non-null `Indices` array. The extra items in the arrays are not
+  meaningful and should be ignored. Allowing these buffers to be larger than
+  currently needed reduces the need to reallocate buffers for different
+  values. For example, when cursoring through a vector valued column with
+  `VectorSize` of 100, client code could pre-allocate values and indices
+  arrays and seed a `VBuffer<T>` with those arrays. When fetching values, the
+  client code passes the `VBuffer<T>` by reference. The called code can re-use
+  those arrays, filling them with the current values.
+
+* Generally, vectors should use a sparse representation only when the number
+  of non-default items is at most half the value of Length. However, this
+  guideline is not a mandate.
+
+See the full `IDataView` technical specification for additional details on
+`VBuffer<T>`, including complete discussion of programming idioms, and
+information on helper classes for building and manipulating vectors.
+
+## Standard Conversions
+
+The `IDataView` system includes the definition and implementation of many
+standard conversions. Standard conversions are required to map source default
+values to destination default values. When both the source type and
+destination type have an `NA` value, the conversion must map `NA` to `NA`.
+When the source type has an `NA` value, but the destination type does not, the
+conversion must map `NA` to the default value of the destination type.
+
+Most standard conversions are implemented by the singleton class `Conversions`
+in the namespace `Microsoft.MachineLearning.Data.Conversion`. The standard
+conversions are exposed by the `ConvertTransform`.
+
+### From Text
+
+There are standard conversions from `TX` to the standard primitive types,
+`R4`, `R8`, `I1`, `I2`, `I4`, `I8`, `U1`, `U2`, `U4`, `U8`, and `BL`. For non-
+empty, non-missing `TX` values, these conversions use standard parsing of
+floating-point and integer values. For `BL`, the mapping is case insensitive,
+maps text values `{ true, yes, t, y, 1, +1, + }` to `DvBool.True`, and maps
+the values `{ false, no, f, n, 0, -1, - }` to `DvBool.False`.
+
+If parsing fails, the result is the `NA` value for floating-point, signed
+integer types, and boolean, and zero for unsigned integer types. Note that
+overflow of an integer type is considered failure of parsing, so produces an
+`NA` (or zero for unsigned). These conversions map missing/`NA` text to `NA`,
+for floating-point and signed integer types, and to zero for unsigned integer
+types.
+
+These conversions are required to map empty text (the default value of `TX`)
+to the default value of the destination, which is zero for all numeric types
+and DvBool.False for `BL`. This may seem unfortunate at first glance, but
+leads to some nice invariants. For example, when loading a text file with
+sparse row specifications, it's desirable for the result to be the same
+whether the row is first processed entirely as `TX` values, then parsed, or
+processed directly into numeric values, that is, parsing as the row is
+processed. In the latter case, it is simple to map implicit items (suppressed
+due to sparsity) to zero. In the former case, these items are first mapped to
+the empty text value. To get the same result, we need empty text to map to
+zero.
+
+### Floating Point
+
+There are standard conversions from `R4` to `R8` and from `R8` to `R4`. These
+are the standard IEEE 754 conversions (using unbiased round-to-nearest in the
+case of `R8` to `R4`).
+
+### Signed Integer
+
+There are standard conversions from each signed integer type to each other
+signed integer type. These conversions map `NA` to `NA`, map any other numeric
+value that fits in the destination type to the corresponding value, and maps
+any numeric value that does not fit in the destination type to `NA`. For
+example, when mapping from `I1` to `I2`, the source `NA` value, namely 0x80,
+is mapped to the destination `NA` value, namely 0x8000, and all other numeric
+values are mapped as expected. When mapping from `I2` to `I1`, any value that
+is too large in magnitude to fit in `I1`, such as 312, is mapped to `NA`,
+namely 0x80.
+
+### Signed Integer to Floating Point
+
+There are standard conversions from each signed integer type to each floating-
+point type. These conversions map `NA` to `NA`, and map all other values
+according to the IEEE 754 specification using unbiased round-to-nearest.
+
+### Unsigned Integer
+
+There are standard conversions from each unsigned integer type to each other
+unsigned integer type. These conversions map any numeric value that fits in
+the destination type to the corresponding value, and maps any numeric value
+that does not fit in the destination type to zero. For example, when mapping
+from `U2` to `U1`, any value that is too large in magnitude to fit in `U1`,
+such as 312, is mapped to zero.
+
+### Unsigned Integer to Floating Point
+
+There are standard conversions from each unsigned integer type to each
+floating-point type. These conversions map all values according to the IEEE
+754 specification using unbiased round-to-nearest.
+
+### Key Types
+
+There are standard conversions from one key type to another, provided:
+
+* The source and destination key types have the same `Min` and `Count` values.
+
+* Either the number of bytes in the destination's underlying type is greater
+  than the number of bytes in the source's underlying type, or the `Count`
+  value is positive. In the latter case, the `Count` is necessarily less than
+  `2^^k`, where `k` is the number of bits in the destination type's underlying
+  type. For example, `U1[1-*]` can be converted to `U2[1-*]`, but `U2[1-*]`
+  cannot be converted to `U1[1-*]`. Also, `U1[1-100]` and `U2[1-100]` can be
+  converted in both directions.
+
+The conversion maps source representation values to the corresponding
+destination representation values. There are no special cases, because of the
+requirements above.
+
+### Boolean to Numeric
+
+There are standard conversions from `BL` to each of the signed integer and
+floating point numeric. These map `DvBool.True` to one, `DvBool.False` to
+zero, and `DvBool.NA` to the numeric type's `NA` value.
+
+## Type Classes
+
+This chapter contains information on the C# classes used to represent column
+types. Since the IDataView type system is extensible this list describes only
+the core data types.
+
+### `ColumnType` Abstract Class
+
+The IDataView system includes the abstract class `ColumnType`. This is the
+base class for all column types. `ColumnType` has several convenience
+properties that simplify testing for common patterns. For example, the
+`IsVector` property indicates whether the `ColumnType` is an instance of
+`VectorType`.
+
+In the following notes, the symbol `type` is a variable of type `ColumnType`.
+
+* The `type.RawType` property indicates the representation type of the column
+  type. Its use should generally be restricted to constructing generic type
+  and method instantiations. In particular, testing whether `type.RawType ==
+  typeof(int)` is not sufficient to test for the standard `U4` type. The
+  proper test is `type == NumberType.I4`, since there is a single universal
+  instance of the `I4` type.
+
+* Certain .Net types have a corresponding `DataKind` `enum` value. The value
+  of the `type.RawKind` property is consistent with `type.RawType`. For .Net
+  types that do not have a corresponding `DataKind` value, the `type.RawKind`
+  property returns zero. The `type.RawKind` property is particularly useful
+  when switching over raw type possibilities, but only after testing for the
+  broader kind of the type (key type, numeric type, etc.).
+
+* The `type.IsVector` property is equivalent to `type is VectorType`.
+
+* The `type.IsNumber` property is equivalent to `type is NumberType`.
+
+* The `type.IsText` property is equivalent to `type is TextType`. There is a
+  single instance of the `TextType`, so this is also equivalent to `type ==
+  TextType.Instance`.
+
+* The `type.IsBool` property is equivalent to `type is BoolType`. There is a
+  single instance of the `BoolType`, so this is also equivalent to `type ==
+  BoolType.Instance`.
+
+* Type `type.IsKey` property is equivalent to `type is KeyType`.
+
+* If `type` is a key type, then `type.KeyCount` is the same as
+  `((KeyType)type).Count`. If `type` is not a key type, then `type.KeyCount`
+  is zero. Note that a key type can have a `Count` value of zero, indicating
+  that the count is unknown, so `type.KeyCount` being zero does not imply that
+  `type` is not a key type. In summary, `type.KeyCount` is equivalent to:
+  `type is KeyType ? ((KeyType)type).Count : 0`.
+
+* The `type.ItemType` property is the item type of the vector type, if `type`
+  is a vector type, and is the same as `type` otherwise. For example, to test
+  for a type that is either `TX` or a vector of `TX`, one can use
+  `type.ItemType.IsText`.
+
+* The `type.IsKnownSizeVector` property is equivalent to `type.VectorSize >
+  0`.
+
+* The `type.VectorSize` property is zero if either `type` is not a vector type
+  or if `type` is a vector type of unknown/variable length. Otherwise, it is
+  the length of vectors belonging to the type.
+
+* The `type.ValueCount` property is one if `type` is not a vector type and the
+  same as `type.VectorSize` if `type` is a vector type.
+
+* The `Equals` method returns whether the types are semantically equivalent.
+  Note that for vector types, this requires the dimensionality information to
+  be identical.
+
+* The `SameSizeAndItemType` method is the same as `Equals` for non-vector
+  types. For vector types, it returns true iff the two types have the same
+  item type and have the same `VectorSize` values. For example, for the two
+  vector types `V<R4,3,2>` and `V<R4,6>`, `Equals` returns false but
+  `SameSizeAndItemType` returns true.
+
+### `PrimitiveType` Abstract Class
+
+The `PrimitiveType` abstract class derives from `ColumnType` and is the base
+class of all primitive type implementations.
+
+### `TextType` Sealed Class
+
+The `TextType` sealed class derives from `PrimitiveType` and is a singleton-
+class for the standard text type. The instance is exposed by the static
+`TextType.Instance` property.
+
+### `BooleanType` Sealed Class
+
+The `BooleanType` sealed class derives from `PrimitiveType` and is a
+singleton-class for the standard boolean type. The instance is exposed by the
+static `BooleanType.Instance` property.
+
+### `NumberType` Sealed Class
+
+The `NumberType` sealed class derives from `PrimitiveType` and exposes single
+instances of each of the standard numeric types, `R4`, `R8`, `I1`, `I2`, `I4`,
+`I8`, `U1`, `U2`, `U4`, `U8`, and `UG`.
+
+### `DateTimeType` Sealed Class
+
+The `DateTimeType` sealed class derives from `PrimitiveType` and is a
+singleton-class for the standard datetime type. The instance is exposed by the
+static `DateTimeType.Instance` property.
+
+### `DateTimeZoneType` Sealed Class
+
+The `DateTimeZoneType` sealed class derives from `PrimitiveType` and is a
+singleton-class for the standard datetime timezone type. The instance is
+exposed by the static `DateTimeType.Instance` property.
+
+### `TimeSpanType` Sealed Class
+
+The `TimeSpanType` sealed class derives from `PrimitiveType` and is a
+singleton-class for the standard datetime timezone type. The instance is
+exposed by the static `TimeSpanType.Instance` property.
+
+### `KeyType` Sealed Class
+
+The `KeyType` sealed class derives from `PrimitiveType` and instances
+represent key types.
+
+Notes:
+
+* Two key types are considered equal iff their kind, min, count, and
+  contiguous values are the same.
+
+* The static `IsValidDataKind` method returns true iff kind is `U1`, `U2`,
+  `U4`, or `U8`. These are the only valid underlying data kinds for key types.
+
+* The inherited `KeyCount` property returns the same value as the `Count`
+  property.
+
+### `VectorType` Sealed Class
+
+The `VectorType` sealed class derives from `ColumnType` and instances
+represent vector types. The item type is specified as the first parameter to
+each constructor and the dimension information is inferred from the additional
+parameters.
+
+* The `DimCount` property indicates the number of dimensions and the `GetDim`
+  method returns a particular dimension value. All dimension values are non-
+  negative integers. A dimension value of zero indicates unknown (or variable)
+  in that dimension.
+
+* The `VectorSize` property returns the product of the dimensions.
+
+* The `IsSubtypeOf(VectorType other)` method returns true if this is a subtype
+  of `other`, in the sense that they have the same item type, and either have
+  the same `VectorSize` or `other.VectorSize` is zero.
+
+* The inherited `Equals` method returns true if the two types have the same
+  item type and the same dimension information.
+
+* The inherited `SameSizeAndItemType(ColumnType other)` method returns true if
+  `other` is a vector type with the same item type and the same `VectorSize`
+  value.
diff --git a/docs/code/IdvFileFormat.md b/docs/code/IdvFileFormat.md
new file mode 100644
index 0000000000..4009eed726
--- /dev/null
+++ b/docs/code/IdvFileFormat.md
@@ -0,0 +1,191 @@
+# IDV File Format
+
+This document describes ML.NET's Binary dataview file format, version 1.1.1.5
+written by the `BinarySaver` and `BinaryLoader` classes, commonly known as the
+`.idv` format.
+
+## Goal of the Format
+
+A dataview is a collection of columns, over some number of rows. (Do not
+confuse column with features. Columns can be and often are vector valued, and
+it is expected though not required that commonly all features will be together
+in one vector valued column.)
+
+The actual values are stored in blocks. A block holds values for a single
+column across multiple rows. Block format is dictated by a codec. There is a
+table-of-contents and lookup table to facilitate quasi-random access to
+particular blocks. (Quasi in the sense that you can only seek to a block, not
+to a particular within a block.)
+
+## General Data Format
+
+Before we discuss the format itself we will establish some conventions on how
+individual scalar values, strings, and other data is serialized. All basic
+pieces of data (e.g., a single number, or a single string) are encoded in ways
+reflecting the semantics of the .NET `BinaryWriter` class, those semantics
+being:
+
+* All numbers are stored as little-endian, using their natural fix-length
+  binary encoding.
+
+* Strings are stored using an unsigned
+  [LEB128](https://en.wikipedia.org/wiki/LEB128) number describing the number
+  of bytes, followed by that many bytes containing the UTF-8 encoded string.
+
+A note about this: LEB128 is a simple encoding to encode arbitrarily large
+integers. Each byte of 8-bits follows this convention. The most significant
+bit is 0 if and only if this is the end of the LEB128 encoding. The remaining
+7 bits are a part of the number being encoded. The bytes are stored
+little-endian, that is, the first byte holds the 7 least significant bits, the
+second byte (if applicable) holds the next 7 least significant bits, etc., and
+the last byte holds the 7 most significant bits. LEB128 is used one or two
+places in this format. (I might tend to prefer use of LEB128 in places where
+we are writing values that, on balance, we expect to be relatively small, and
+only in cases where there is no potential for benefit for random access to the
+associated stream, since LEB128 is incompatible with random access. However,
+this is not formulated into anything approaching a definite policy.)
+
+## Header
+
+Every binary instances stream has a header composed of 256 bytes, at the start
+of the stream. Not all bytes are used. Those bytes that are not explicitly
+used have undefined content, and can have anything in them. We strongly
+encourage writers of this format to insert obscene messages in this dead
+space. The content is defined as follows (the offsets being the start of that
+column).
+
+Offsets | Type  | Name and Description
+--------|-------|---------------------
+0       | ulong | **Signature**: The magic number of this file.
+8       | ulong | **Version**: Indicates the version of the data file.
+16      | ulong | **CompatibleVersion**: Indicates the minimum reader version that can interpret this file, possibly with some data loss.
+24      | long  | **TableOfContentsOffset**: The offset to the column table of contents structure.
+32      | long  | **TailOffset**: The eight-byte tail signature starts at this offset. So, the entire dataset stream should be considered to have byte length of eight plus this value.
+40      | long  | **RowCount**: The number of rows in this data file.
+48      | int   | **ColumnCount**: The number of columns in this data file.
+
+Notes on these:
+
+* The signature of this file is `0x00425644004C4D43`, which is, when written
+  little-endian to a file, `CML DVB ` with null characters in the place of
+  spaces. These letters are intended  to suggest "CloudML DataView Binary."
+
+* The tail signature is the byte-reversed version of this, that is,
+  `0x434D4C0044564200`.
+
+* Versions are encoded as four 16-bit unsigned numbers passed into a single
+  ulong, with higher order bits being a more major version. The first
+  supported version of the is 1.1.1.4, that is, `0x0001000100010004`.
+  (Versions prior to 1.1.1.4 did exist, but were not released, so we do not
+  support them, though we do describe them in this document for the sake of
+  completeness.)
+ 
+## Table of Contents Format
+
+The table of contents are packed entries, with there being as many entries as
+there are columns. The version field here indicates the versions where that
+entry is written. ≥ indicates the field occurred in versions after and
+including that version, = indicates the field occurs only in that version.
+
+Description | Entry Type | Version
+------------|------------|--------
+Column name | string     | ≥1.1.1.1
+Codec loadname | string  | ≥1.1.1.1
+Codec parameterization length | LEB128 integer | ≥1.1.1.1
+Codec parameterization, which must have precisely the length indicated above | arbitrary, but with specified length | ≥1.1.1.1
+Compression kind | CompressionKind (byte) | ≥1.1.1.1
+Rows per block in this column | LEB128 integer | ≥1.1.1.1
+Lookup table offset | long | ≥1.1.1.1
+Slot names offset, or 0 if this column has no slot names, if 1.1.1.2 behave as if there are no slot names, with this having value 0) | long | =1.1.1.3
+Slot names byte size (present only if slot names offset is greater than 0) | long | =1.1.1.3
+Slot names count (present only if slot names offset is greater than 0) | int | =1.1.1.3
+Metadata table of contents offset, or 0 if there is no metadata (1.1.1.4) | long | ≥1.1.1.4
+
+For those working in the ML.NET codebase: The three `Codec` fields are handled
+by the `CodecFactory.WriteCodec/TryReadCodec` methods, with the definition
+stream being at the start of the codec loadname, and being at the end of the
+codec parameterization, both in the case of success or failure.
+
+CompressionCodec enums are described below, and describe the compression
+algorithm used to compress blocks.
+
+### Compression Kind
+
+The enum for compression kind is one byte, and follows this scheme:
+
+Compression Kind                                               | Code
+---------------------------------------------------------------|-----
+None                                                           | 0
+DEFLATE (i.e., [RFC1951](http://www.ietf.org/rfc/rfc1951.txt)) | 1
+zlib (i.e., [RFC1950](http://www.ietf.org/rfc/rfc1950.txt))    | 2
+
+None means no compression. DEFLATE is the default scheme. There is a tendency
+to conflate zlib and DEFLATE, so to be clear: zlib can be (somewhat inexactly)
+considered a wrapped version of DEFLATE, but it is still a distinct (but
+closely related) format. However, both are implemented by the zlib library,
+which is probably the source of the confusion.
+
+## Metadata Table of Contents Format
+
+The metadata table of contents begins with a LEB128 integer describing the
+number of entries. (Should be a positive value, since if a column has no
+metadata the expectation is that the offset for the metadata TOC will be
+stored as 0.) What follows that are that many packed entries. Each entry is
+somewhat akin to the column table of contents entry, with some simplifications
+considering that there will be exactly one "block" with one item.
+
+Description                                            | Entry Type
+-------------------------------------------------------|------------
+Metadata kind                                          | string
+Codec loadname                                         | string
+Codec parameterization length                          | LEB128 integer
+Codec parameterization, which must have precisely the length indicated above | arbitrary, but with specified length
+Compression kind                                       | CompressionKind(byte)
+Offset of the block where the metadata item is written | long
+Byte length of the block                               | LEB128 integer
+
+The "block" written is written in exactly same format as the main content
+blocks. This will be very slightly inefficient as that scheme is sometimes
+written to accommodate many entries, but I don't expect that to be much of a
+burden.
+
+## Lookup Table Format
+
+Each table of contents entry is associated with a lookup table starting at the
+indicated lookup table offset. It is written as packed binary, with each
+lookup entry consisting of 16 bytes. So in all, the lookup table takes 16
+bytes, times the total number of blocks for this column.
+
+Description                                               | Entry Type
+----------------------------------------------------------|-----------
+Block offset, position in the file where the block starts | long
+Block length, its size in bytes in the file               | int
+Uncompressed block length, its size in bytes if the block bytes were decompressed according to the column's compression codec | int
+
+## Slot Names
+
+If slot names are stored, they are stored as pairs of integer index/string
+pairs. As many pairs are stored as count of slot names were present in the
+table of contents entry. Note that this only appeared in version 1.1.1.3. With
+1.1.1.4 and later, slot names were just considered yet another piece of
+metadata.
+
+Description       | Entry Type
+------------------|-----------
+Index of the slot | int
+The slot name     | string
+
+## Block Format
+
+Columns are ordered into blocks, with each block holding the binary encoded
+values for one particular columns across a range of rows. So for example, if
+the column's table of contents describes it as having 1000 rows per block, the
+first block will contain the values for the column for rows 0 through 999,
+second block 1000 through 1999, etc., with all blocks containing the same
+number of blocks, except the last block which will contain fewer items (unless
+the number of rows just so happens to be a multiple of the block size).
+
+Each column is a possibly compressed sequence of bytes, compressed according
+to the compression type field in the table of contents.  It begins and ends at
+the offsets indicated in the metadata entry stored in the directory. The
+uncompressed bytes will be stored in the format as described by the codec.
diff --git a/docs/code/KeyValues.md b/docs/code/KeyValues.md
new file mode 100644
index 0000000000..ced135761d
--- /dev/null
+++ b/docs/code/KeyValues.md
@@ -0,0 +1,149 @@
+# Key Values
+
+Most commonly, key-values are used to encode items where it is convenient or
+efficient to represent values using numbers, but you want to maintain the
+logical "idea" that these numbers are keys indexing some underlying, implicit
+set of values, in a way more explicit than simply mapping to a number would
+allow you to do.
+
+A more formal description of key values and types is
+[here](IDataViewTypeSystem.md#key-types). *This* document's motivation is less
+to describe what key types and values are, and more to instead describe why
+key types are necessary and helpful things to have. Necessarily, this document,
+is more anecdotal in its descriptions to motivate its content.
+
+Let's take a few examples of transforms that produce keys:
+
+* The `TermTransform` forms a dictionary of unique observed values to a key.
+  The key type's count indicates the number of items in the set, and through
+  the `KeyValue` metadata "remembers" what each key is representing.
+
+* The `HashTransform` performs a hash of input values, and produces a key
+  value with count equal to the range of the hash function, which, if a b bit
+  hash was used, will produce a 2ᵇ hash.
+
+* The `CharTokenizeTransform` will take input strings and produce key values
+  representing the characters observed in the string.
+
+## Keys as Intermediate Values
+
+Explicitly invoking transforms that produce key values, and using those key
+values, is sometimes helpful. However, given that most trainers expect the
+feature vector to be a vector of floating point values and *not* keys, in
+typical usage the majority of usages of keys is as some sort of intermediate
+value on the way to that final feature vector. (Unless, say, doing something
+like preparing labels for a multiclass learner.)
+
+So why not go directly to the feature vector, and forget this key stuff?
+Actually, to take text as the canonical example, we used to. However, by
+structuring the transforms from, say, text to key to vector, rather than text
+to vector *directly*, we are able to simplify a lot of code on the
+implementation side, which is both less for us to maintain, and also for users
+gives consistency in behavior.
+
+So for example, the `CharTokenize` above might appear to be a strange choice:
+*why* represent characters as keys? The reason is that the ngram transform is
+written to ingest keys, not text, and so we can use the same transform for
+both the n-gram featurization of words, as well as n-char grams.
+
+Now, much of this complexity is hidden from the user: most users will just use
+the `text` transform, select some options for n-grams, and chargrams, and not
+be aware of these internal invisible keys. Similarly, use the categorical or
+categorical hash transforms, without knowing that internally it is just the
+term or hash transform followed by a `KeyToVector` transform. But, keys are
+still there, and it would be impossible to really understand ML.NET's
+featurization pipeline without understanding keys. Any user that wants to
+understand how, say, the text transform resulted in a particular featurization
+will have to inspect the key values to get that understanding.
+
+## Keys are not Numbers
+
+As an actual CLR data type, key values are stored as some form of unsigned
+integer (most commonly `uint`). The most common confusion that arises from
+this is to ascribe too much importance to the fact that it is a `uint`, and
+think these are somehow just numbers. This is incorrect.
+
+For keys, the concept of order and difference has no inherent, real meaning as
+it does for numbers, or at least, the meaning is different and highly domain
+dependent. Consider a numeric `U4` type, with values `0`, `1`, and `2`. The
+difference between `0` and `1` is `1`, and the difference between `1` and `2`
+is `1`, because they're numbers. Very well: now consider that you train a term
+transform over the input tokens `apple`, `pear`, and `orange`: this will also
+map to the keys logically represented as the numbers `0`, `1`, and `2`
+respectively. Yet for a key, is the difference between keys `0` and `1`, `1`?
+No, the difference is `0` maps to `apple` and `1` to `pear`. Also order
+doesn't mean one key is somehow "larger," it just means we saw one before
+another -- or something else, if sorting by value happened to be selected.
+
+Also: ML.NET's vectors can be sparse. Implicit entries in a sparse vector are
+assumed to have the `default` value for that type -- that is, implicit values
+for numeric types will be zero. But what would be the implicit default value
+for a key value be? Take the `apple`, `pear`, and `orange` example above -- it
+would inappropriate for the default value to be `0`, because that means the
+result is `apple`, would be appropriate. The only really appropriate "default"
+choice is that the value is unknown, that is, missing.
+
+An implication of this is that there is a distinction between the logical
+value of a key-value, and the actual physical value of the value in the
+underlying type. This will be covered more later.
+
+## As an Enumeration of a Set: `KeyValues` Metadata
+
+While keys can be used for many purposes, they are often used to enumerate
+items from some underlying set. In order to map keys back to this original
+set, many transform producing key values will also produce `KeyValues`
+metadata associated with that output column.
+
+Valid `KeyValues` metadata is a vector of length equal to the count of the
+type of the column. This can be of varying types: it is often text, but does
+not need to be. For example, a `term` applied to a column would have
+`KeyValue` metadata of item type equal to the item type of the input data.
+
+How this metadata is used downstream depends on the purposes of who is
+consuming it, but common uses are: in multiclass classification, for
+determining the human readable class names, or if used in featurization,
+determining the names of the features.
+
+Note that `KeyValues` data is optional, and sometimes is not even sensible.
+For example, if we consider a clustering algorithm, the prediction of the
+cluster of an example would. So for example, if there were five clusters, then
+the prediction would indicate the cluster by `U4<0-4>`. Yet, these clusters
+were found by the algorithm itself, and they have no natural descriptions.
+
+## Actual Implementation
+
+This may be of use only to writers or extenders of ML.NET, or users of our
+API. How key values are presented *logically* to users of ML.NET, is distinct
+from how they are actually stored *physically* in actual memory, both in
+ML.NET source and through the API. For key values:
+
+* All key values are stored in unsigned integers.
+* The missing key values is always stored as `0`. See the note above about the
+  default value, to see why this must be so.
+* Valid non-missing key values are stored from `1`, onwards, irrespective of
+whatever we claim in the key type that minimum value is.
+
+So when, in the prior example, the term transform would map `apple`, `pear`,
+and `orange` seemingly to `0`, `1`, and `2`, values of `U4<0-2>`, in reality,
+if you were to fire up the debugger you would see that they were stored with
+`1`, `2`, and `3`, with unrecognized values being mapped to the "default"
+missing value of `0`.
+
+Nevertheless, we almost never talk about this, no more than we would talk
+about our "strings" really being implemented as string slices: this is purely
+an implementation detail, relevant only to people working with key values at
+the source level. To a regular non-API user of ML.NET, key values appear
+*externally* to be simply values, just as strings appear to be simply strings,
+and so forth.
+
+There is another implication: a hypothetical type `U1<4000-4002>` is actually
+a sensible type in this scheme. The `U1` indicates that is stored in one byte,
+which would on first glance seem to conflict with values like `4000`, but
+remember that the first valid key-value is stored as `1`, and we've identified
+the valid range as spanning the three values 4000 through 4002. That is,
+`4000` would be represented physically as `1`.
+
+The reality cannot be seen by any conventional means I am aware of, save for
+viewing ML.NET's workings in the debugger or using the API and inspecting
+these raw values yourself: that `4000` you would see is really stored as the
+`byte` `1`, `4001` as `2`, `4002` as `3`, and a missing value stored as `0`.
\ No newline at end of file
diff --git a/docs/code/VBufferCareFeeding.md b/docs/code/VBufferCareFeeding.md
new file mode 100644
index 0000000000..1de7239dc6
--- /dev/null
+++ b/docs/code/VBufferCareFeeding.md
@@ -0,0 +1,270 @@
+# `VBuffer` Care and Feeding
+
+The `VBuffer` is ML.NET's central vector type, used throughout our data
+pipeline and many other places to represent vectors of values. For example,
+nearly all trainers accept feature vectors as `VBuffer<float>`.
+
+## Technical `VBuffers`
+
+A `VBuffer<T>` is a generic type that supports both dense and sparse vectors
+over items of type `T`. This is the representation type for all
+[`VectorType`](IDataViewTypeSystem.md#vector-representations) instances in the
+`IDataView` ecosystem. When an instance of this is passed to a row cursor
+getter, the callee is free to take ownership of and re-use the arrays
+(`Values` and `Indices`).
+
+A `VBuffer<T>` is a struct, and has the following `readonly` fields:
+
+* `int Length`: The logical length of the buffer.
+
+* `int Count`: The number of items explicitly represented. This equals `Length`
+when the representation is dense and is less than `Length` when sparse.
+
+* `T[] Values`: The values. Only the first `Count` of these are valid.
+
+* `int[] Indices`: The indices. For a dense representation, this array is not
+  used, and may be `null`. For a sparse representation it is parallel to
+  values and specifies the logical indices for the corresponding values. Only
+  the first `Count` of these are valid.
+
+`Values` must have length equal to at least `Count`. If the representation is
+sparse, that is, `Count < Length`, then `Indices` must have length also
+greater than or equal to `Count`. If `Count == 0`, then it is entirely legal
+for `Values` or `Indices` to be `null`, and if dense then `Indices` can always
+be `null`.
+
+On the subject of `Count == 0`, note that having no valid values in `Indices`
+and `Values` merely means that no values are explicitly defined, and the
+vector should be treated, logically, as being filled with `default(T)`.
+
+For sparse vectors, `Indices` must have length equal to at least `Count`, and
+the first `Count` indices must be increasing, with all indices between `0`
+inclusive and `Length` exclusive.
+
+Regarding the generic type parameter `T`, the only real assumption made about
+this type is that assignment (that is, using `=`) is sufficient to create an
+*independent* copy of that item. All representation types of the [primitive
+types](IDataViewTypeSystem.md#standard-column-types) have this property (e.g.,
+`DvText`, `DvInt4`, `Single`, `Double`, etc.), but for example, `VBuffer<>`
+itself does not have this property. So, no `VBuffer` of `VBuffer`s for you.
+
+## Sparse Values as `default(T)`
+
+Any implicit value in a sparse `VBuffer<T>` **must** logically be treated as
+though it has value `default(T)`. For example, suppose we have the following
+two declarations:
+
+```csharp
+var a = new VBuffer<float>(5, new float[] { 0, 1, 0, 0, 2 });
+var b = new VBuffer<float>(5, 2, new float[] { 1, 2 }, new int[] { 1, 4 });
+```
+
+Here, `a` is dense, and `b` is sparse. However, any operations over either
+must treat the logical indices `0`, `2`, and `3` as if they have value `0.0f`.
+The two should be equivalent!
+
+ML.NET throughout its codebase assumes in many places that sparse and dense
+representations are interchangeable: if it is more efficient to consider
+something sparse or dense, the code will have no qualms about making that
+conversion. This does mean though, that we depend upon all code that deals
+with `VBuffer` responding in the same fashion, and respecting this convention.
+
+As a corollary to the above note about equivalence of sparse and dense
+representations, since they are equivalent it follows that any code consuming
+`VBuffer`s must work equally well with *both*. That is, there must never be a
+condition where data is read and assumed to be either sparse, or dense, since
+implementers of `IDataView` and related interfaces are perfectly free to
+produce either.
+
+The only "exception" to this rule is a necessary acknowledgment of the reality
+of floating point mathematics: sometimes due to the way the JIT will optimize
+code one code path or another, and due to the fact that floating point math is
+not commutative, operations over sparse `VBuffer<float>` or `VBuffer<double>`
+vectors can sometimes result in modestly different results than the "same"
+operation over dense values.
+
+## Why Buffer Reuse
+
+The question is often asked by people new to this codebase: why bother with
+buffer reuse at all? Without going into too many details, we used to not and
+suffered for it. We had a far simpler system where examples were yielded
+through an
+[`IEnumerable<>`](https://msdn.microsoft.com/en-us/library/9eekhta0.aspx), and
+our vector type at the time had `Indices` and `Values` arrays as well, but
+their sizes were there actual sizes, and being returned through an
+`IEnumerable<>` there was no plausible way to "recycle" the buffers.
+
+Also: who "owned" a fetched example (the caller, or callee) was not clear.
+Because it was not clear, code was inevitably written and checked in that made
+*either* assumption, which meant, ultimately, that everything that touched
+these would try to duplicate everything by default, because doing anything
+else would fail in some case.
+
+The reason why this becomes important is because [garbage
+collection](https://msdn.microsoft.com/en-us/library/0xy59wtx.aspx) in the
+.NET framework is not free. Creating and destroying these arrays *can* be
+cheap, provided that they are sufficiently small, short lived, and only ever
+exist in a single thread. But, violate any of these, there is a possibility
+these arrays could be allocated on the large object heap, or promoted to gen-2
+collection. The results could be disastrous: in one particularly memorable
+incident regarding neural net training, the move to `IDataView` and its
+`VBuffer`s resulted in a more than tenfold decrease in runtime performance,
+because under the old regime the garbage collection of the feature vectors was
+just taking so much time.
+
+This is somewhat unfortunate: a joke-that's-not-really-a-joke on the team was
+that we were writing C# as though it were C code. Be that as it may, buffer
+reuse is essential to our performance, especially on larger problems.
+
+This design requirement of buffer reuse has deeper implications for the
+ecosystem merely than the type here. For example, it is one crucial reason why
+so many value accessors in the `IDataView` ecosystem fill in values passed in
+through a `ref` parameter, rather than, say, being a return value.
+
+## Buffer Re-use as a User
+
+Let's imagine we have an `IDataView` in a variable `dataview`, and we just so
+happen to know that the column with index 5 has representation type
+`VBuffer<float>`. (In real code, this would presumably we achieved through
+more complicated involving an inspection of `dataview.Schema`, but we omit
+such details here.)
+
+```csharp
+using (IRowCursor cursor = dataview.GetRowCursor(col => col == 5))
+{
+    ValueGetter<VBuffer<float>> getter = cursor.GetGetter<VBuffer<float>>(5);
+    var value = default(VBuffer<float>);
+    while (cursor.MoveNext())
+    {
+        getter(ref value);
+        // Presumably something else is done with value.
+    }
+}
+```
+
+In this example, we open a cursor (telling it to make only column 5 active),
+then get the "getter" over this column. What enables buffer re-use for this is
+that, as we go row by row over the data with the `while` loop, we pass in the
+same `value` variable in to the `getter` delegate, again and again. Presumably
+the first time, or several, memory is allocated. Initially `value =
+default(VBuffer<float>)`, that is, it has zero `Length` and `Count` and `null`
+`Indices` and `Values`. Presumably at some point, probably the first call,
+`value` is replaced with a `VBuffer<float>` that has actual values allocated.
+In subsequent calls, perhaps these are judged as insufficiently large, and new
+arrays are allocated, but we would expect at some point the arrays would
+become "large enough" to accommodate many values, so reallocations would
+become increasingly rare.
+
+A common mistake made by first time users is to do something like move the
+`var value` declaration inside the `while` loop, thus dooming `getter` to have
+to allocate the arrays every single time, completely defeating the purpose of
+buffer reuse.
+
+## Buffer Re-use as a Developer
+
+Nearly all methods in ML.NET that "return" a `VBuffer<T>` do not really return
+a `VBuffer<T>` *at all*, but instead have a parameter `ref VBuffer<T> dst`,
+where they are expected to put the result. See the above example, with the
+`getter`. A `ValueGetter` is defined:
+
+```csharp
+public delegate void ValueGetter<TValue>(ref TValue value);
+```
+
+Let's describe the typical practice of "returning" a `VBuffer` in, say, a
+`ref` parameter named `dst`: if `dst.Indices` and `dst.Values` are
+sufficiently large to contain the result, they are used, and the value is
+calculated, or sometimes copied, into them. If either is insufficiently large,
+then a new array is allocated in its place. After all the calculation happens,
+a *new* `VBuffer` is constructed and assigned to `dst`. (And possibly, if they
+were large enough, using the same `Indices` and `Values` arrays as were passed
+in, albeit with different values.)
+
+`VBuffer`s can be either sparse or dense. However, even when returning a dense
+`VBuffer`, you would not discard the `Indices` array of the passed in buffer,
+assuming there was one. The `Indices` array was merely larger than necessary
+to store *this* result: that you happened to not need it this call does not
+justify throwing it away. We don't care about buffer re-use just for a single
+call, after all! The dense constructor for the `VBuffer` accepts an `Indices`
+array for precisely this reason!
+
+Also note: when you return a `VBuffer` in this fashion, the caller is assumed
+to *own* it at that point. This means they can do whatever they like to it,
+like pass the same variable into some other getter, or modify its values.
+Indeed, this is quite common: normalizers in ML.NET get values from their
+source, then immediately scale the contents of `Values` appropriately. This
+would hardly be possible if the callee was considered to have some stake in
+that result.
+
+There is a corollary on this point: because the caller owns any `VBuffer`,
+then you shouldn't do anything that irrevocably destroys their usefulness to
+the caller. For example, consider this method that takes a vector `src`, and
+stores the scaled result in `dst`.
+
+```csharp
+VectorUtils.ScaleBy(ref VBuffer<float> src, ref VBuffer<float> dst, float c)
+```
+
+What this does is, copy the values from `src` to `dst`, while scaling each
+value seen by `c`.
+
+One possible alternate (wrong) implementation of this would be to just say
+`dst=src` then scale all contents of `dst.Values` by `c`. But, then `dst` and
+`src` would share references to their internal arrays, completely compromising
+the caller's ability to do anything useful with them: if the caller were to
+pass `dst` into some other method that modified it, this could easily
+(silently!) modify the contents of `src`. The point is: if you are writing
+code *anywhere* whose end result is that two distinct `VBuffer` structs share
+references to their internal arrays, you've almost certainly introduced a
+**nasty** pernicious bug for your users.
+
+## Utilities for Working with `VBuffer`s
+
+ML.NET's runtime code has a number of utilities for operating over `VBuffer`s
+that we have written to be generally useful. We will not treat on these in
+detail here, but:
+
+* `Microsoft.ML.Runtime.Data.VBuffer<T>` itself contains a few methods for
+  accessing and iterating over its values.
+
+* `Microsoft.ML.Runtime.Internal.Utilities.VBufferUtils` contains utilities
+  mainly for non-numeric manipulation of `VBuffer`s.
+
+* `Microsoft.ML.Runtime.Numeric.VectorUtils` contains math operations
+  over `VBuffer<float>` and `float[]`, like computing norms, dot-products, and
+  whatnot.
+
+* `Microsoft.ML.Runtime.Data.BufferBuilder<T>` is an abstract class whose
+  concrete implementations are used throughout ML.NET to build up `VBuffer<T>`
+  instances. Note that if one *can* simply build a `VBuffer` oneself easily
+  and do not need the niceties provided by the buffer builder, you should
+  probably just do it yourself.
+
+* `Microsoft.MachineLearning.Internal.Utilities.EnsureSize` is often useful to
+ensure that the arrays are of the right size.
+
+## Golden Rules
+
+Here are some golden rules to remember:
+
+Remember the conditions under which `Indices` and `Values` can be `null`! A
+developer forgetting that `null` values for these fields are legal is probably
+the most common error in our code. (And unfortunately one that sometimes takes
+a while to pop up: most users don't feed in empty inputs to our trainers.)
+
+In terms of accessing anything in `Values` or `Indices`, remember, treat
+`Count` as the real length of these arrays, not the actual length of the
+arrays.
+
+If you write code that results in two distinct `VBuffer`s sharing references
+to their internal arrays, (e.g., there are two `VBuffer`s `a` and `b`, with
+`a.Indices == b.Indices` with `a.Indices != null`, or `a.Values == b.Values`
+with `a.Values != null`) then you've almost certainly done something wrong.
+
+Structure your code so that `VBuffer`s have their buffers re-used as much as
+possible. If you have code called repeatedly where you are passing in some
+`default(VBuffer<T>)`, there's almost certainly an opportunity there.
+
+When re-using a `VBuffer` that's been passed to you, remember that even when
+constructing a dense vector, you should still re-use the `Indices` array that
+was passed in.
\ No newline at end of file
diff --git a/docs/release-notes/0.1/release-0.1.md b/docs/release-notes/0.1/release-0.1.md
index def4723a31..a36055527a 100644
--- a/docs/release-notes/0.1/release-0.1.md
+++ b/docs/release-notes/0.1/release-0.1.md
@@ -13,7 +13,7 @@ dotnet add package Microsoft.ML
 
 From package manager:
 ```
-Install-Package Microsoft.ML	
+Install-Package Microsoft.ML
 ```
 
 Or from within Visual Studio's NuGet package manager.
diff --git a/run.cmd b/run.cmd
index be63160b5e..616c7cc018 100644
--- a/run.cmd
+++ b/run.cmd
@@ -11,7 +11,7 @@ set DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1
 set DOTNET_MULTILEVEL_LOOKUP=0
 
 :: Restore the Tools directory
-call %~dp0init-tools.cmd
+call "%~dp0init-tools.cmd"
 if NOT [%ERRORLEVEL%]==[0] exit /b 1
 
 set _toolRuntime=%~dp0Tools
@@ -21,8 +21,8 @@ set _json=%~dp0config.json
 :: run.exe depends on running in the root directory, notably because the config.json specifies
 :: a relative path to the binclash logger
 
-pushd %~dp0
-call %_dotnet% %_toolRuntime%\run.exe "%_json%" %*
+pushd "%~dp0"
+call "%_dotnet%" "%_toolRuntime%\run.exe" "%_json%" %*
 popd
 
 exit /b %ERRORLEVEL%
\ No newline at end of file
diff --git a/src/Microsoft.ML.Core/Data/ICursor.md b/src/Microsoft.ML.Core/Data/ICursor.md
new file mode 100644
index 0000000000..403107acc6
--- /dev/null
+++ b/src/Microsoft.ML.Core/Data/ICursor.md
@@ -0,0 +1,174 @@
+﻿# `ICursor` Notes
+
+This document includes some more in depth notes on some expert topics for
+`ICursor` implementations.
+
+## `Batch`
+
+Some cursorable implementations, like `IDataView`, can through
+`GetRowCursorSet` return a set of parallel cursors that partition the sequence
+of rows as would have normally been returned through a plain old
+`GetRowCursor`, just sharded into multiple cursors. These cursors can be
+accessed across multiple threads to enable parallel evaluation of a data
+pipeline. This is key for the data pipeline performance.
+
+However, even though the data pipeline can perform this parallel evaluation,
+at the end of this parallelization we usually ultimately want to recombine the
+separate thread's streams back into a single stream. This is accomplished
+through `Batch`.
+
+So, to review what actually happens in ML.NET code: multiple cursors are
+returned through a method like `IDataView.GetRowCursorSet`. Operations can
+happen on top of these cursors -- most commonly, transforms creating new
+cursors on top of them -- and the `IRowCursorConsolidator` implementation will
+utilize this `Batch` field to "reconcile" the multiple cursors back down into
+one cursor.
+
+It may help to first understand this process intuitively, to understand
+`Batch`'s requirements: when we reconcile the outputs of multiple cursors, the
+consolidator will take the set of cursors. It will find the one with the
+"lowest" `Batch` ID. (This must be uniquely determined: that is, no two
+cursors should ever return the same `Batch` value.) It will iterate on that
+cursor until the `Batch` ID changes. Whereupon, the consolidator will find the
+next cursor with the next lowest batch ID (which should be greater, of course,
+than the `Batch` value we were just iterating on).
+
+Put another way: if we called `GetRowCursor` (possibly with an `IRandom`
+instance), and we store all the values from the rows from that cursoring in
+some list, in order. Now, imagine we create `GetRowCursorSet` (with an
+identically constructed `IRandom` instance), and store the values from the
+rows from the cursorings from all of them in a different list, in order,
+accompanied by their `Batch` value. Then: if we were to perform a *stable*
+sort on the second list keyed by the stored `Batch` value, it should have
+content identical to the first list.
+
+So: `Batch` is a `long` value associated with every `ICounted` implementation
+(including implementations of `ICursor`). This quantity must be:
+
+Non-decreasing as we call `MoveNext` or `MoveMany`. That is, it is fine for
+the `Batch` to repeat the same batch value within the same cursor (though not
+across cursors from the same set), but any change in the value must be an
+increase.
+
+The requirement of consistency is for one cursor or cursors from a *single*
+call to `GetRowCursor` or `GetRowCursorSet`. It is not required that the
+`Batch` be consistent among multiple independent cursorings.
+
+## `MoveNext` and `MoveMany`
+
+Once `MoveNext` or `MoveMany` returns `false`, naturally all subsequent calls
+to either of these two methods should return `false`. It is important that
+they not throw, return `true`, or have any other behavior.
+
+## `GetIdGetter`
+
+This treats on the requirements of a proper `GetIdGetter` implementation.
+
+It is common for objects to serve multiple `ICounted` instances to iterate
+over what is supposed to be the same data, e.g., in an `IDataView` a cursor
+set will produce the same data as a serial cursor, just partitioned, and a
+shuffled cursor will produce the same data as a serial cursor or any other
+shuffled cursor, only shuffled. The ID exists for applications that need to
+reconcile which entry is actually which. Ideally this ID should be unique, but
+for practical reasons, it suffices if collisions are simply extremely
+improbable.
+
+To be specific, the original case motivating this functionality was SDCA where
+it is both simultaneously important that we see data in a "random-enough"
+fashion (so shuffled), but each instance has an associated dual variable. The
+ID is used to associate each instance with the corresponding dual variable
+across multiple iterations of the data. (Note that in this specific
+application collisions merely being improbable is sufficient, since if there
+was hypothetically a collision it would not actually probably materially
+affect the results anyway, though I'm making that claim without
+justification).
+
+Note that this ID, while it must be consistent for multiple streams according
+to the semantics above, is not considered part of the data per se. So, to take
+the example of a data view specifically, a single data view must render
+consistent IDs across all cursorings, but there is no suggestion at all that
+if the "same" data were presented in a different data view (as by, say, being
+transformed, cached, saved, or whatever), that the IDs between the two
+different data views would have any discernable relationship.
+
+Since this ID is practically often derived from the IDs of some other
+`ICounted` (e.g., for a transform, the IDs of the output are usually derived
+from the IDs of the input), it is not only necessary to claim that the ID
+generated here is probabilistically unique, but also describe a procedure or
+set of guidelines implementors of this method should attempt to follow, in
+order to ensure that downstream components have a fair shake at producing
+unique IDs themselves.
+
+Duplicate IDs being improbable is practically accomplished with a
+hashing-derived mechanism. For this we have the `UInt128` methods `Fork`,
+`Next`, and `Combine`. See their documentation for specifics, but they all
+have in common that they treat the `UInt128` as some sort of intermediate hash
+state, then return a new hash state based on hashing of a block of additional
+'bits.' (Since the bits hashed may be fixed, depending on the operation, this
+can be very efficient.) The basic assumption underlying all of that collisions
+between two different hash states on the same data, or hashes on the same hash
+state on different data, are unlikely to collide. Note that this is also the
+reason why `UInt128` was introduced; collisions become likely when we have the
+number of elements on the order of the square root of the hash space. The
+square root of `UInt64.MaxValue` is only several billion, a totally reasonable
+number of instances in a dataset, whereas a collision in a 128-bit space is
+less likely.
+
+Let's consider the IDs of a collection of entities, then, to be ideally an
+"acceptable set." An "acceptable set" is one that is not especially or
+perversely likely to contain collisions versus other sets, and also one
+unlikely to result in an especially or perversely likely to collide set of
+IDs, so long as the IDs are done according to the following operations that
+operate on acceptable sets.
+
+1. The simple enumeration of `UInt128` numeric values from any number is an
+   acceptable set. (This covers how most loaders generate IDs. Typically, we
+   start from 0, but other choices, like -1, are acceptable.)
+
+2. The subset of any acceptable set is an acceptable set. (For example, all
+   filter transforms that map any input row to 0 or 1 output rows, can just
+   pass through the input cursor's IDs.)
+
+3. Applying `Fork` to every element of an acceptable set exactly once will
+   result in an acceptable set.
+
+4. As a generalization of the above, if for each element of an acceptable set,
+   you built the set comprised of the single application of `Fork` on that ID
+   followed by the set of any number of application of `Next`, the union of
+   all such sets would itself be an acceptable set. (This is useful, for
+   example, for operations that produce multiple items per input item. So, if
+   you produced two rows based on every single input row, if the input ID were
+   _id_, then, the ID of the first row could be `Fork` of _id_, and the second
+   row could have ID of `Fork` then `Next` of the same _id_.)
+
+5. If you have potentially multiple acceptable sets, while the union of them
+   obviously might not be acceptable, if you were to form a mapping from each
+   set, to a different ID of some other acceptable set (each such ID should be
+   different), and then for each such set/ID pairing, create the set created
+   from `Combine` of the items of that set with that ID, and then union of
+   those sets will be acceptable. (This is useful, for example, if you had
+   something like a join, or a Cartesian product transform, or something like
+   that.)
+
+6. Moreover, similar to the note about the use of `Fork`, and `Next`, if
+   during the creation of one of those sets describe above, you were to form
+   for each item of that set, a set resulting from multiple applications of
+   `Next`, the union of all those would also be an acceptable set.
+
+This list is not exhaustive. Other operations I have not listed above might
+result in an acceptable set as well, but one should not attempt other
+operations without being absolutely certain of what one is doing. The general
+idea is that one should structure the construction of IDs, so that it will
+never arise that the same ID is hashed against the same data, and are
+introduced as if we expect them to be two separate IDs.
+
+Of course, with a malicious actor upstream, collisions are possible and can be
+engineered quite trivially (e.g., just by returning a constant ID for all
+rows), but we're not supposing that the input `IDataView` is maliciously
+engineering hash states, or applying the operations above in any strange way
+to attempt to induce collisions. E.g., you could take, operation 1, define it
+to be the enumeration of all `UInt128` values, then take operation 2 to select
+out specifically those that are hash states that will result in collisions.
+But I'm supposing this is not happening. If you are running an implementation
+of a dataview in memory that you're supposing is malicious, you probably have
+bigger problems than someone inducing collisions.
\ No newline at end of file
diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs
index 3a468ad451..586f6a4b02 100644
--- a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs
+++ b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs
@@ -49,8 +49,10 @@ public sealed class EntryPointInfo
             public readonly Type OutputType;
             public readonly Type[] InputKinds;
             public readonly Type[] OutputKinds;
+            public readonly ObsoleteAttribute ObsoleteAttribute;
 
-            internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, TlcModule.EntryPointAttribute attribute)
+            internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, 
+                TlcModule.EntryPointAttribute attribute, ObsoleteAttribute obsoleteAttribute)
             {
                 Contracts.AssertValueOrNull(ectx);
                 ectx.AssertValue(method);
@@ -61,6 +63,7 @@ internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, TlcModule.Ent
                 Method = method;
                 ShortName = attribute.ShortName;
                 FriendlyName = attribute.UserName;
+                ObsoleteAttribute = obsoleteAttribute;
 
                 // There are supposed to be 2 parameters, env and input for non-macro nodes.
                 // Macro nodes have a 3rd parameter, the entry point node.
@@ -183,7 +186,10 @@ private ModuleCatalog(IExceptionContext ectx)
                     var attr = methodInfo.GetCustomAttributes(typeof(TlcModule.EntryPointAttribute), false).FirstOrDefault() as TlcModule.EntryPointAttribute;
                     if (attr == null)
                         continue;
-                    var info = new EntryPointInfo(ectx, methodInfo, attr);
+
+                    var info = new EntryPointInfo(ectx, methodInfo, attr, 
+                        methodInfo.GetCustomAttributes(typeof(ObsoleteAttribute), false).FirstOrDefault() as ObsoleteAttribute);
+
                     entryPoints.Add(info);
                     if (_entryPointMap.ContainsKey(info.Name))
                     {
diff --git a/src/Microsoft.ML.Core/Utilities/MathUtils.cs b/src/Microsoft.ML.Core/Utilities/MathUtils.cs
index e2848ea25d..8106ff5a2c 100644
--- a/src/Microsoft.ML.Core/Utilities/MathUtils.cs
+++ b/src/Microsoft.ML.Core/Utilities/MathUtils.cs
@@ -871,5 +871,16 @@ public static double Cos(double a)
             var res = Math.Cos(a);
             return Math.Abs(res) > 1 ? double.NaN : res;
         }
+
+        /// <summary>
+        /// Returns the smallest integral value that is greater than or equal to the result of the division.
+        /// </summary>
+        /// <param name="numerator">Number to be divided.</param>
+        /// <param name="denomenator">Number with which to divide the numerator.</param>
+        /// <returns></returns>
+        public static long DivisionCeiling(long numerator, long denomenator)
+        {
+            return (checked(numerator + denomenator) - 1) / denomenator;
+        }
     }
 }
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
index 3867b18f26..3678c749ba 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
@@ -85,15 +85,19 @@ private bool TryParse(string str)
                 return TryParseSource(rgstr[istr++]);
             }
 
-            private bool TryParseSource(string str)
+            private bool TryParseSource(string str) => TryParseSourceEx(str, out Source);
+
+            public static bool TryParseSourceEx(string str, out Range[] ranges)
             {
+                ranges = null;
                 var strs = str.Split(',');
                 if (str.Length == 0)
                     return false;
-                Source = new Range[strs.Length];
+
+                ranges = new Range[strs.Length];
                 for (int i = 0; i < strs.Length; i++)
                 {
-                    if ((Source[i] = Range.Parse(strs[i])) == null)
+                    if ((ranges[i] = Range.Parse(strs[i])) == null)
                         return false;
                 }
                 return true;
@@ -294,9 +298,12 @@ public class ArgumentsCore
                 ShortName = "size")]
             public int? InputSize;
 
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Source column separator. Options: tab, space, comma, single character", ShortName = "sep")]
+            [Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Source column separator. Options: tab, space, comma, single character", ShortName = "sep")]
             public string Separator = "tab";
 
+            [Argument(ArgumentType.AtMostOnce, Name = nameof(Separator), Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly, HelpText = "Source column separator.", ShortName = "sep")]
+            public char[] SeparatorChars = new[] { '\t' };
+
             [Argument(ArgumentType.Multiple, HelpText = "Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40",
                 ShortName = "col", SortOrder = 1)]
             public Column[] Column;
@@ -1005,26 +1012,40 @@ public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource files
                 _inputSize = SrcLim - 1;
 
             _host.CheckNonEmpty(args.Separator, nameof(args.Separator), "Must specify a separator");
-            string sep = args.Separator.ToLowerInvariant();
 
-            if (sep == ",")
-                _separators = new char[] { ',' };
-            else
+            //Default arg.Separator is tab and default args.SeparatorChars is also a '\t'.
+            //At a time only one default can be different and whichever is different that will 
+            //be used.
+            if (args.SeparatorChars.Length > 1 || args.SeparatorChars[0] != '\t')
             {
                 var separators = new HashSet<char>();
-                foreach (string s in sep.Split(','))
-                {
-                    if (string.IsNullOrEmpty(s))
-                        continue;
+                foreach (char c in args.SeparatorChars)
+                    separators.Add(NormalizeSeparator(c.ToString()));
 
-                    char c = NormalizeSeparator(s);
-                    separators.Add(c);
-                }
                 _separators = separators.ToArray();
-
-                // Handling ",,,," case, that .Split() returns empty strings.
-                if (_separators.Length == 0)
+            }
+            else
+            {
+                string sep = args.Separator.ToLowerInvariant();
+                if (sep == ",")
                     _separators = new char[] { ',' };
+                else
+                {
+                    var separators = new HashSet<char>();
+                    foreach (string s in sep.Split(','))
+                    {
+                        if (string.IsNullOrEmpty(s))
+                            continue;
+
+                        char c = NormalizeSeparator(s);
+                        separators.Add(c);
+                    }
+                    _separators = separators.ToArray();
+
+                    // Handling ",,,," case, that .Split() returns empty strings.
+                    if (_separators.Length == 0)
+                        _separators = new char[] { ',' };
+                }
             }
 
             _bindings = new Bindings(this, cols, headerFile);
diff --git a/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs b/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs
index 337aff3c14..3ec23a01bf 100644
--- a/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs
+++ b/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs
@@ -832,21 +832,21 @@ public static class SweepableDiscreteParam
         public static class PipelineSweeperSupportedMetrics
         {
             public new static string ToString() => "SupportedMetric";
-            public const string Auc = "Auc";
+            public const string Auc = "AUC";
             public const string AccuracyMicro = "AccuracyMicro";
             public const string AccuracyMacro = "AccuracyMacro";
             public const string F1 = "F1";
-            public const string AuPrc = "AuPrc";
+            public const string AuPrc = "AUPRC";
             public const string TopKAccuracy = "TopKAccuracy";
             public const string L1 = "L1";
             public const string L2 = "L2";
-            public const string Rms = "Rms";
+            public const string Rms = "RMS";
             public const string LossFn = "LossFn";
             public const string RSquared = "RSquared";
             public const string LogLoss = "LogLoss";
             public const string LogLossReduction = "LogLossReduction";
-            public const string Ndcg = "Ndcg";
-            public const string Dcg = "Dcg";
+            public const string Ndcg = "NDCG";
+            public const string Dcg = "DCG";
             public const string PositivePrecision = "PositivePrecision";
             public const string PositiveRecall = "PositiveRecall";
             public const string NegativePrecision = "NegativePrecision";
@@ -858,9 +858,9 @@ public static class PipelineSweeperSupportedMetrics
             public const string ThreshAtK = "ThreshAtK";
             public const string ThreshAtP = "ThreshAtP";
             public const string ThreshAtNumPos = "ThreshAtNumPos";
-            public const string Nmi = "Nmi";
+            public const string Nmi = "NMI";
             public const string AvgMinScore = "AvgMinScore";
-            public const string Dbi = "Dbi";
+            public const string Dbi = "DBI";
         }
     }
 }
diff --git a/src/Microsoft.ML.Data/Transforms/TermTransform.md b/src/Microsoft.ML.Data/Transforms/TermTransform.md
new file mode 100644
index 0000000000..d245fda91b
--- /dev/null
+++ b/src/Microsoft.ML.Data/Transforms/TermTransform.md
@@ -0,0 +1,41 @@
+﻿# `TermTransform` Architecture
+
+The term transform takes one or more input columns, and builds a map mapping
+observed values into a key type, with various options. This requires first
+that we build a map given observed data, and then later have a means of
+applying that map to new data. There are four helper classes of objects to
+perform this task. We describe them here.
+
+* `Builder` instances can have different behavior depending on the item type
+  of the input, and whether we are sorting the input. They have mutable state.
+  Crucially they work over only primitive types, and are not aware of whether
+  the input data is vector or scalar. As their name implies they are stateful
+  objects.
+
+* `Trainer` objects wrap a builder, and have different implementations
+  depending on whether their input is vector or scalar. They are also
+  responsible for making sure the number of values accumulated does not exceed
+  the max terms limit. During the term transform's training, these objects are
+  constructed given a row on a particular column, and during training a method
+  is called to process that row.
+
+The above two classes of objects will be created and in existence only when
+the transform is being trained, that is, in the non-deserializing constructor,
+and will not be persisted beyond that point.
+
+* `TermMap` objects are created from builder objects, and are the final term
+  map. These are sort of the frozen immutable cousins of builders. Like
+  builders they work over primitive types. These objects are the ones
+  responsible for serialization and deserialization to the model stream and
+  other informational streams, construction of the per-item value mapper
+  delegates, and accessors for the term values used in constructing the
+  metadata (though they do not handle the actual metadata functions
+  themselves). Crucially, these objects can be shared among multiple term
+  transforms or multiple columns, and are not associated themselves with a
+  particular input dataview or column per se.
+
+* `BoundTermMap` objects are bound to a particular dataview, and a particular
+  column. They are responsible for the polymorphism depending on whether the
+  column they're mapping is vector or scalar, the creation of the metadata
+  accessors, and the creation of the actual getters (though, of course, they
+  rely on the term map to do this).
diff --git a/src/Microsoft.ML.Parquet/ParquetLoader.cs b/src/Microsoft.ML.Parquet/ParquetLoader.cs
index f0ecde34dc..21271f6e5c 100644
--- a/src/Microsoft.ML.Parquet/ParquetLoader.cs
+++ b/src/Microsoft.ML.Parquet/ParquetLoader.cs
@@ -94,7 +94,7 @@ public sealed class Arguments
         private readonly int _columnChunkReadSize;
         private readonly Column[] _columnsLoaded;
         private readonly DataSet _schemaDataSet;
-        private const int _defaultColumnChunkReadSize = 100; // Should ideally be close to Rowgroup size
+        private const int _defaultColumnChunkReadSize = 1000000;
 
         private bool _disposed;
 
@@ -368,8 +368,8 @@ private sealed class Cursor : RootCursorBase, IRowCursor
             private readonly Delegate[] _getters;
             private readonly ReaderOptions _readerOptions;
             private int _curDataSetRow;
-            private IEnumerator _dataSetEnumerator;
-            private IEnumerator _blockEnumerator;
+            private IEnumerator<int> _dataSetEnumerator;
+            private IEnumerator<int> _blockEnumerator;
             private IList[] _columnValues;
             private IRandom _rand;
 
@@ -390,11 +390,18 @@ public Cursor(ParquetLoader parent, Func<int, bool> predicate, IRandom rand)
                     Columns = _loader._columnsLoaded.Select(i => i.Name).ToArray()
                 };
 
-                int numBlocks = (int)Math.Ceiling(((decimal)parent.GetRowCount() / _readerOptions.Count));
-                int[] blockOrder = _rand == null ? Utils.GetIdentityPermutation(numBlocks) : Utils.GetRandomPermutation(rand, numBlocks);
+                // The number of blocks is calculated based on the specified rows in a block (defaults to 1M).
+                // Since we want to shuffle the blocks in addition to shuffling the rows in each block, checks
+                // are put in place to ensure we can produce a shuffle order for the blocks.
+                var numBlocks = MathUtils.DivisionCeiling((long)parent.GetRowCount(), _readerOptions.Count);
+                if (numBlocks > int.MaxValue)
+                {
+                    throw _loader._host.ExceptParam(nameof(Arguments.ColumnChunkReadSize), "Error due to too many blocks. Try increasing block size.");
+                }
+                var blockOrder = CreateOrderSequence((int)numBlocks);
                 _blockEnumerator = blockOrder.GetEnumerator();
 
-                _dataSetEnumerator = new int[0].GetEnumerator(); // Initialize an empty enumerator to get started
+                _dataSetEnumerator = Enumerable.Empty<int>().GetEnumerator();
                 _columnValues = new IList[_actives.Length];
                 _getters = new Delegate[_actives.Length];
                 for (int i = 0; i < _actives.Length; ++i)
@@ -472,12 +479,12 @@ protected override bool MoveNextCore()
             {
                 if (_dataSetEnumerator.MoveNext())
                 {
-                    _curDataSetRow = (int)_dataSetEnumerator.Current;
+                    _curDataSetRow = _dataSetEnumerator.Current;
                     return true;
                 }
                 else if (_blockEnumerator.MoveNext())
                 {
-                    _readerOptions.Offset = (int)_blockEnumerator.Current * _readerOptions.Count;
+                    _readerOptions.Offset = (long)_blockEnumerator.Current * _readerOptions.Count;
 
                     // When current dataset runs out, read the next portion of the parquet file.
                     DataSet ds;
@@ -486,9 +493,9 @@ protected override bool MoveNextCore()
                         ds = ParquetReader.Read(_loader._parquetStream, _loader._parquetOptions, _readerOptions);
                     }
 
-                    int[] dataSetOrder = _rand == null ? Utils.GetIdentityPermutation(ds.RowCount) : Utils.GetRandomPermutation(_rand, ds.RowCount);
+                    var dataSetOrder = CreateOrderSequence(ds.RowCount);
                     _dataSetEnumerator = dataSetOrder.GetEnumerator();
-                    _curDataSetRow = dataSetOrder[0];
+                    _curDataSetRow = dataSetOrder.ElementAt(0);
 
                     // Cache list for each active column
                     for (int i = 0; i < _actives.Length; i++)
@@ -533,6 +540,26 @@ public bool IsColumnActive(int col)
                 Ch.CheckParam(0 <= col && col < _colToActivesIndex.Length, nameof(col));
                 return _colToActivesIndex[col] >= 0;
             }
+
+            /// <summary>
+            /// Creates a in-order or shuffled sequence, based on whether _rand is specified.
+            /// If unable to create a shuffle sequence, will default to sequential.
+            /// </summary>
+            /// <param name="size">Number of elements in the sequence.</param>
+            /// <returns></returns>
+            private IEnumerable<int> CreateOrderSequence(int size)
+            {
+                IEnumerable<int> order;
+                try
+                {
+                    order = _rand == null ? Enumerable.Range(0, size) : Utils.GetRandomPermutation(_rand, size);
+                }
+                catch (OutOfMemoryException)
+                {
+                    order = Enumerable.Range(0, size);
+                }
+                return order;
+            }
         }
 
         #region Dispose
@@ -671,4 +698,4 @@ private string ConvertListToString(IList list)
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.PipelineInference/AutoInference.cs b/src/Microsoft.ML.PipelineInference/AutoInference.cs
index 894029460a..642ff4d0d7 100644
--- a/src/Microsoft.ML.PipelineInference/AutoInference.cs
+++ b/src/Microsoft.ML.PipelineInference/AutoInference.cs
@@ -158,7 +158,8 @@ private bool GetDataVariableName(IExceptionContext ectx, string nameOfData, JTok
                     return false;
 
                 string dataVar = firstNodeInputs.Value<String>(nameOfData);
-                ectx.Check(VariableBinding.IsValidVariableName(ectx, dataVar), $"Invalid variable name {dataVar}.");
+                if (!VariableBinding.IsValidVariableName(ectx, dataVar))
+                    throw ectx.ExceptParam(nameof(nameOfData), $"Invalid variable name {dataVar}.");
 
                 variableName = dataVar.Substring(1);
                 return true;
@@ -172,12 +173,14 @@ private bool GetDataVariableName(IExceptionContext ectx, string nameOfData, JTok
         public sealed class RunSummary
         {
             public double MetricValue { get; }
+            public double TrainingMetricValue { get; }
             public int NumRowsInTraining { get; }
             public long RunTimeMilliseconds { get; }
 
-            public RunSummary(double metricValue, int numRows, long runTimeMilliseconds)
+            public RunSummary(double metricValue, int numRows, long runTimeMilliseconds, double trainingMetricValue)
             {
                 MetricValue = metricValue;
+                TrainingMetricValue = trainingMetricValue;
                 NumRowsInTraining = numRows;
                 RunTimeMilliseconds = runTimeMilliseconds;
             }
@@ -303,7 +306,7 @@ private void MainLearningLoop(int batchSize, int numOfTrainingRows)
                 var stopwatch = new Stopwatch();
                 var probabilityUtils = new Sweeper.Algorithms.SweeperProbabilityUtils(_host);
 
-                 while (!_terminator.ShouldTerminate(_history))
+                while (!_terminator.ShouldTerminate(_history))
                 {
                     // Get next set of candidates
                     var currentBatchSize = batchSize;
@@ -341,16 +344,17 @@ private void ProcessPipeline(Sweeper.Algorithms.SweeperProbabilityUtils utils, S
 
                 // Run pipeline, and time how long it takes
                 stopwatch.Restart();
-                double d = candidate.RunTrainTestExperiment(_trainData.Take(randomizedNumberOfRows),
-                    _testData, Metric, TrainerKind);
+                candidate.RunTrainTestExperiment(_trainData.Take(randomizedNumberOfRows),
+                    _testData, Metric, TrainerKind, out var testMetricVal, out var trainMetricVal);
                 stopwatch.Stop();
 
                 // Handle key collisions on sorted list
-                while (_sortedSampledElements.ContainsKey(d))
-                    d += 1e-10;
+                while (_sortedSampledElements.ContainsKey(testMetricVal))
+                    testMetricVal += 1e-10;
 
                 // Save performance score
-                candidate.PerformanceSummary = new RunSummary(d, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds);
+                candidate.PerformanceSummary = 
+                    new RunSummary(testMetricVal, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds, trainMetricVal);
                 _sortedSampledElements.Add(candidate.PerformanceSummary.MetricValue, candidate);
                 _history.Add(candidate);
             }
@@ -579,11 +583,13 @@ public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimiz
             RecipeInference.InferRecipesFromData(env, trainDataPath, schemaDefinitionFile,
                 out var _, out schemaDefinition, out var _, true);
 
+#pragma warning disable 0618
             var data = ImportTextData.ImportText(env, new ImportTextData.Input
             {
                 InputFile = new SimpleFileHandle(env, trainDataPath, false, false),
                 CustomSchema = schemaDefinition
             }).Data;
+#pragma warning restore 0618
             var splitOutput = TrainTestSplit.Split(env, new TrainTestSplit.Input { Data = data, Fraction = 0.8f });
             AutoMlMlState amls = new AutoMlMlState(env, metric, autoMlEngine, terminator, trainerKind,
                 splitOutput.TrainData.Take(numOfSampleRows), splitOutput.TestData.Take(numOfSampleRows));
diff --git a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
index bd4de97b48..a0aae16a63 100644
--- a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
+++ b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
@@ -15,21 +15,34 @@ namespace Microsoft.ML.Runtime.PipelineInference
 {
     public static class AutoMlUtils
     {
-        public static AutoInference.RunSummary ExtractRunSummary(IHostEnvironment env, IDataView data, string metricColumnName)
+        public static double ExtractValueFromIDV(IHostEnvironment env, IDataView result, string columnName)
         {
-            double metricValue = 0;
-            int numRows = 0;
-            var schema = data.Schema;
-            schema.TryGetColumnIndex(metricColumnName, out var metricCol);
+            Contracts.CheckValue(env, nameof(env));
+            env.CheckValue(result, nameof(result));
+            env.CheckNonEmpty(columnName, nameof(columnName));
 
-            using (var cursor = data.GetRowCursor(col => col == metricCol))
+            double outputValue = 0;
+            var schema = result.Schema;
+            if (!schema.TryGetColumnIndex(columnName, out var metricCol))
+                throw env.ExceptParam(nameof(columnName), $"Schema does not contain column: {columnName}");
+
+            using (var cursor = result.GetRowCursor(col => col == metricCol))
             {
                 var getter = cursor.GetGetter<double>(metricCol);
-                cursor.MoveNext();
-                getter(ref metricValue);
+                bool moved = cursor.MoveNext();
+                env.Check(moved, "Expected an IDataView with a single row. Results dataset has no rows to extract.");
+                getter(ref outputValue);
+                env.Check(!cursor.MoveNext(), "Expected an IDataView with a single row. Results dataset has too many rows.");
             }
 
-            return new AutoInference.RunSummary(metricValue, numRows, 0);
+            return outputValue;
+        }
+
+        public static AutoInference.RunSummary ExtractRunSummary(IHostEnvironment env, IDataView result, string metricColumnName, IDataView trainResult = null)
+        {
+            double testingMetricValue = ExtractValueFromIDV(env, result, metricColumnName);
+            double trainingMetricValue = trainResult != null ? ExtractValueFromIDV(env, trainResult, metricColumnName)  : double.MinValue;
+            return new AutoInference.RunSummary(testingMetricValue, 0, 0, trainingMetricValue);
         }
 
         public static CommonInputs.IEvaluatorInput CloneEvaluatorInstance(CommonInputs.IEvaluatorInput evalInput) =>
@@ -618,5 +631,7 @@ public static Tuple<string, string[]>[] ConvertToSweepArgumentStrings(TlcModule.
             }
             return results;
         }
+
+        public static string GenerateOverallTrainingMetricVarName(Guid id) => $"Var_Training_OM_{id:N}";
     }
 }
diff --git a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs
index 58f44b9ce8..06c260a054 100644
--- a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs
+++ b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs
@@ -65,11 +65,14 @@ public static Output ExtractSweepResult(IHostEnvironment env, ResultInput input)
             var col1 = new KeyValuePair<string, ColumnType>("Graph", TextType.Instance);
             var col2 = new KeyValuePair<string, ColumnType>("MetricValue", PrimitiveType.FromKind(DataKind.R8));
             var col3 = new KeyValuePair<string, ColumnType>("PipelineId", TextType.Instance);
+            var col4 = new KeyValuePair<string, ColumnType>("TrainingMetricValue", PrimitiveType.FromKind(DataKind.R8));
+            var col5 = new KeyValuePair<string, ColumnType>("FirstInput", TextType.Instance);
+            var col6 = new KeyValuePair<string, ColumnType>("PredictorModel", TextType.Instance);
 
             if (rows.Count == 0)
             {
                 var host = env.Register("ExtractSweepResult");
-                outputView = new EmptyDataView(host, new SimpleSchema(host, col1, col2, col3));
+                outputView = new EmptyDataView(host, new SimpleSchema(host, col1, col2, col3, col4, col5, col6));
             }
             else
             {
@@ -77,6 +80,9 @@ public static Output ExtractSweepResult(IHostEnvironment env, ResultInput input)
                 builder.AddColumn(col1.Key, (PrimitiveType)col1.Value, rows.Select(r => new DvText(r.GraphJson)).ToArray());
                 builder.AddColumn(col2.Key, (PrimitiveType)col2.Value, rows.Select(r => r.MetricValue).ToArray());
                 builder.AddColumn(col3.Key, (PrimitiveType)col3.Value, rows.Select(r => new DvText(r.PipelineId)).ToArray());
+                builder.AddColumn(col4.Key, (PrimitiveType)col4.Value, rows.Select(r => r.TrainingMetricValue).ToArray());
+                builder.AddColumn(col5.Key, (PrimitiveType)col5.Value, rows.Select(r => new DvText(r.FirstInput)).ToArray());
+                builder.AddColumn(col6.Key, (PrimitiveType)col6.Value, rows.Select(r => new DvText(r.PredictorModel)).ToArray());
                 outputView = builder.GetDataView();
             }
             return new Output { Results = outputView, State = autoMlState };
@@ -132,11 +138,11 @@ public static CommonOutputs.MacroOutput<Output> PipelineSweep(
             // Extract performance summaries and assign to previous candidate pipelines.
             foreach (var pipeline in autoMlState.BatchCandidates)
             {
-                if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId),
-                    out var v))
+                if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId), out var v) &&
+                    node.Context.TryGetVariable(AutoMlUtils.GenerateOverallTrainingMetricVarName(pipeline.UniqueId), out var v2))
                 {
                     pipeline.PerformanceSummary =
-                        AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name);
+                        AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name, (IDataView)v2.Value);
                     autoMlState.AddEvaluated(pipeline);
                 }
             }
@@ -168,14 +174,17 @@ public static CommonOutputs.MacroOutput<Output> PipelineSweep(
             {
                 // Add train test experiments to current graph for candidate pipeline
                 var subgraph = new Experiment(env);
-                var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph);
+                var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph, true);
 
                 // Change variable name to reference pipeline ID in output map, context and entrypoint output.
                 var uniqueName = ExperimentUtils.GenerateOverallMetricVarName(p.UniqueId);
+                var uniqueNameTraining = AutoMlUtils.GenerateOverallTrainingMetricVarName(p.UniqueId);
                 var sgNode = EntryPointNode.ValidateNodes(env, node.Context,
                     new JArray(subgraph.GetNodes().Last()), node.Catalog).Last();
                 sgNode.RenameOutputVariable(trainTestOutput.OverallMetrics.VarName, uniqueName, cascadeChanges: true);
+                sgNode.RenameOutputVariable(trainTestOutput.TrainingOverallMetrics.VarName, uniqueNameTraining, cascadeChanges: true);
                 trainTestOutput.OverallMetrics.VarName = uniqueName;
+                trainTestOutput.TrainingOverallMetrics.VarName = uniqueNameTraining;
                 expNodes.Add(sgNode);
 
                 // Store indicators, to pass to next iteration of macro.
diff --git a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj
index 7cf9585f3b..ab3e464c74 100644
--- a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj
+++ b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj
@@ -17,6 +17,7 @@
     <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
     <ProjectReference Include="..\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
     <ProjectReference Include="..\Microsoft.ML.Sweeper\Microsoft.ML.Sweeper.csproj" />
+    <ProjectReference Include="..\Microsoft.ML\Microsoft.ML.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs
index 21287742a0..662a16798f 100644
--- a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs
+++ b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs
@@ -17,20 +17,38 @@ namespace Microsoft.ML.Runtime.PipelineInference
     /// </summary>
     public sealed class PipelinePattern : IEquatable<PipelinePattern>
     {
+        /// <summary>
+        /// Class for encapsulating the information returned in the output IDataView for a pipeline
+        /// that has been run through the TrainTest macro.
+        /// </summary>
         public sealed class PipelineResultRow
         {
             public string GraphJson { get; }
+            ///<summary>
+            /// The metric value of the test dataset result (always needed).
+            ///</summary>
             public double MetricValue { get; }
+            ///<summary>
+            /// The metric value of the training dataset result (not always used or set).
+            ///</summary>
+            public double TrainingMetricValue { get; }
             public string PipelineId { get; }
+            public string FirstInput { get; }
+            public string PredictorModel { get; }
 
             public PipelineResultRow()
             { }
 
-            public PipelineResultRow(string graphJson, double metricValue, string pipelineId)
+            public PipelineResultRow(string graphJson, double metricValue,
+                string pipelineId, double trainingMetricValue, string firstInput,
+                string predictorModel)
             {
                 GraphJson = graphJson;
                 MetricValue = metricValue;
                 PipelineId = pipelineId;
+                TrainingMetricValue = trainingMetricValue;
+                FirstInput = firstInput;
+                PredictorModel = predictorModel;
             }
         }
 
@@ -111,7 +129,8 @@ public AutoInference.EntryPointGraphDef ToEntryPointGraph(Experiment experiment
         public bool Equals(PipelinePattern obj) => obj != null && UniqueId == obj.UniqueId;
 
         // REVIEW: We may want to allow for sweeping with CV in the future, so we will need to add new methods like this, or refactor these in that case.
-        public Experiment CreateTrainTestExperiment(IDataView trainData, IDataView testData, MacroUtils.TrainerKinds trainerKind, out Models.TrainTestEvaluator.Output resultsOutput)
+        public Experiment CreateTrainTestExperiment(IDataView trainData, IDataView testData, MacroUtils.TrainerKinds trainerKind,
+                bool includeTrainingMetrics, out Models.TrainTestEvaluator.Output resultsOutput)
         {
             var graphDef = ToEntryPointGraph();
             var subGraph = graphDef.Graph;
@@ -136,7 +155,8 @@ public Experiment CreateTrainTestExperiment(IDataView trainData, IDataView testD
                         Model = finalOutput
                     },
                 PipelineId = UniqueId.ToString("N"),
-                Kind = MacroUtils.TrainerKindApiValue<Models.MacroUtilsTrainerKinds>(trainerKind)
+                Kind = MacroUtils.TrainerKindApiValue<Models.MacroUtilsTrainerKinds>(trainerKind),
+                IncludeTrainingMetrics = includeTrainingMetrics
             };
 
             var experiment = _env.CreateExperiment();
@@ -150,7 +170,7 @@ public Experiment CreateTrainTestExperiment(IDataView trainData, IDataView testD
         }
 
         public Models.TrainTestEvaluator.Output AddAsTrainTest(Var<IDataView> trainData, Var<IDataView> testData,
-            MacroUtils.TrainerKinds trainerKind, Experiment experiment = null)
+            MacroUtils.TrainerKinds trainerKind, Experiment experiment = null, bool includeTrainingMetrics = false)
         {
             experiment = experiment ?? _env.CreateExperiment();
             var graphDef = ToEntryPointGraph(experiment);
@@ -174,7 +194,8 @@ public Models.TrainTestEvaluator.Output AddAsTrainTest(Var<IDataView> trainData,
                 TrainingData = trainData,
                 TestingData = testData,
                 Kind = MacroUtils.TrainerKindApiValue<Models.MacroUtilsTrainerKinds>(trainerKind),
-                PipelineId = UniqueId.ToString("N")
+                PipelineId = UniqueId.ToString("N"),
+                IncludeTrainingMetrics = includeTrainingMetrics
             };
             var trainTestOutput = experiment.Add(trainTestInput);
             return trainTestOutput;
@@ -183,57 +204,80 @@ public Models.TrainTestEvaluator.Output AddAsTrainTest(Var<IDataView> trainData,
         /// <summary>
         /// Runs a train-test experiment on the current pipeline, through entrypoints.
         /// </summary>
-        public double RunTrainTestExperiment(IDataView trainData, IDataView testData, AutoInference.SupportedMetric metric, MacroUtils.TrainerKinds trainerKind)
+        public void RunTrainTestExperiment(IDataView trainData, IDataView testData,
+            AutoInference.SupportedMetric metric, MacroUtils.TrainerKinds trainerKind, out double testMetricValue,
+            out double trainMetricValue)
         {
-            var experiment = CreateTrainTestExperiment(trainData, testData, trainerKind, out var trainTestOutput);
+            var experiment = CreateTrainTestExperiment(trainData, testData, trainerKind, true, out var trainTestOutput);
             experiment.Run();
-            var dataOut = experiment.GetOutput(trainTestOutput.OverallMetrics);
-            var schema = dataOut.Schema;
-            schema.TryGetColumnIndex(metric.Name, out var metricCol);
 
-            using (var cursor = dataOut.GetRowCursor(col => col == metricCol))
-            {
-                var getter = cursor.GetGetter<double>(metricCol);
-                double metricValue = 0;
-                cursor.MoveNext();
-                getter(ref metricValue);
-                return metricValue;
-            }
+            var dataOut = experiment.GetOutput(trainTestOutput.OverallMetrics);
+            var dataOutTraining = experiment.GetOutput(trainTestOutput.TrainingOverallMetrics);
+            testMetricValue = AutoMlUtils.ExtractValueFromIDV(_env, dataOut, metric.Name);
+            trainMetricValue = AutoMlUtils.ExtractValueFromIDV(_env, dataOutTraining, metric.Name);
         }
 
-        public static PipelineResultRow[] ExtractResults(IHostEnvironment env, IDataView data, string graphColName, string metricColName, string idColName)
+        public static PipelineResultRow[] ExtractResults(IHostEnvironment env, IDataView data,
+            string graphColName, string metricColName, string idColName, string trainingMetricColName,
+            string firstInputColName, string predictorModelColName)
         {
             var results = new List<PipelineResultRow>();
             var schema = data.Schema;
             if (!schema.TryGetColumnIndex(graphColName, out var graphCol))
-                throw env.ExceptNotSupp($"Column name {graphColName} not found");
+                throw env.ExceptParam(nameof(graphColName), $"Column name {graphColName} not found");
             if (!schema.TryGetColumnIndex(metricColName, out var metricCol))
-                throw env.ExceptNotSupp($"Column name {metricColName} not found");
+                throw env.ExceptParam(nameof(metricColName), $"Column name {metricColName} not found");
+            if (!schema.TryGetColumnIndex(trainingMetricColName, out var trainingMetricCol))
+                throw env.ExceptParam(nameof(trainingMetricColName), $"Column name {trainingMetricColName} not found");
             if (!schema.TryGetColumnIndex(idColName, out var pipelineIdCol))
-                throw env.ExceptNotSupp($"Column name {idColName} not found");
+                throw env.ExceptParam(nameof(idColName), $"Column name {idColName} not found");
+            if (!schema.TryGetColumnIndex(firstInputColName, out var firstInputCol))
+                throw env.ExceptParam(nameof(firstInputColName), $"Column name {firstInputColName} not found");
+            if (!schema.TryGetColumnIndex(predictorModelColName, out var predictorModelCol))
+                throw env.ExceptParam(nameof(predictorModelColName), $"Column name {predictorModelColName} not found");
 
             using (var cursor = data.GetRowCursor(col => true))
             {
+                var getter1 = cursor.GetGetter<double>(metricCol);
+                var getter2 = cursor.GetGetter<DvText>(graphCol);
+                var getter3 = cursor.GetGetter<DvText>(pipelineIdCol);
+                var getter4 = cursor.GetGetter<double>(trainingMetricCol);
+                var getter5 = cursor.GetGetter<DvText>(firstInputCol);
+                var getter6 = cursor.GetGetter<DvText>(predictorModelCol);
+                double metricValue = 0;
+                double trainingMetricValue = 0;
+                DvText graphJson = new DvText();
+                DvText pipelineId = new DvText();
+                DvText firstInput = new DvText();
+                DvText predictorModel = new DvText();
+
                 while (cursor.MoveNext())
                 {
-                    var getter1 = cursor.GetGetter<double>(metricCol);
-                    double metricValue = 0;
                     getter1(ref metricValue);
-                    var getter2 = cursor.GetGetter<DvText>(graphCol);
-                    DvText graphJson = new DvText();
                     getter2(ref graphJson);
-                    var getter3 = cursor.GetGetter<DvText>(pipelineIdCol);
-                    DvText pipelineId = new DvText();
                     getter3(ref pipelineId);
-                    results.Add(new PipelineResultRow(graphJson.ToString(), metricValue, pipelineId.ToString()));
+                    getter4(ref trainingMetricValue);
+                    getter5(ref firstInput);
+                    getter6(ref predictorModel);
+
+                    results.Add(new PipelineResultRow(graphJson.ToString(),
+                        metricValue, pipelineId.ToString(), trainingMetricValue,
+                        firstInput.ToString(), predictorModel.ToString()));
                 }
             }
 
             return results.ToArray();
         }
 
-        public PipelineResultRow ToResultRow() =>
-            new PipelineResultRow(ToEntryPointGraph().Graph.ToJsonString(),
-                PerformanceSummary?.MetricValue ?? -1d, UniqueId.ToString("N"));
+        public PipelineResultRow ToResultRow()
+        {
+            var graphDef = ToEntryPointGraph();
+
+            return new PipelineResultRow($"{{'Nodes' : [{graphDef.Graph.ToJsonString()}]}}",
+                PerformanceSummary?.MetricValue ?? -1d, UniqueId.ToString("N"),
+                PerformanceSummary?.TrainingMetricValue ?? -1d,
+                graphDef.GetSubgraphFirstNodeDataVarName(_env),
+                graphDef.ModelOutput.VarName);
+        }
     }
 }
diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs
index 2ca1af7159..317ee98db0 100644
--- a/src/Microsoft.ML/CSharpApi.cs
+++ b/src/Microsoft.ML/CSharpApi.cs
@@ -22,6 +22,30 @@ namespace Runtime
     {
         public sealed partial class Experiment
         {
+            public Microsoft.ML.Data.CustomTextLoader.Output Add(Microsoft.ML.Data.CustomTextLoader input)
+            {
+                var output = new Microsoft.ML.Data.CustomTextLoader.Output();
+                Add(input, output);
+                return output;
+            }
+
+            public void Add(Microsoft.ML.Data.CustomTextLoader input, Microsoft.ML.Data.CustomTextLoader.Output output)
+            {
+                _jsonNodes.Add(Serialize("Data.CustomTextLoader", input, output));
+            }
+
+            public Microsoft.ML.Data.DataViewReference.Output Add(Microsoft.ML.Data.DataViewReference input)
+            {
+                var output = new Microsoft.ML.Data.DataViewReference.Output();
+                Add(input, output);
+                return output;
+            }
+
+            public void Add(Microsoft.ML.Data.DataViewReference input, Microsoft.ML.Data.DataViewReference.Output output)
+            {
+                _jsonNodes.Add(Serialize("Data.DataViewReference", input, output));
+            }
+
             public Microsoft.ML.Data.IDataViewArrayConverter.Output Add(Microsoft.ML.Data.IDataViewArrayConverter input)
             {
                 var output = new Microsoft.ML.Data.IDataViewArrayConverter.Output();
@@ -53,22 +77,11 @@ public Microsoft.ML.Data.TextLoader.Output Add(Microsoft.ML.Data.TextLoader inpu
                 return output;
             }
 
-            public Microsoft.ML.Data.DataViewReference.Output Add(Microsoft.ML.Data.DataViewReference input)
-            {
-                var output = new Microsoft.ML.Data.DataViewReference.Output();
-                Add(input, output);
-                return output;
-            }
-
             public void Add(Microsoft.ML.Data.TextLoader input, Microsoft.ML.Data.TextLoader.Output output)
             {
                 _jsonNodes.Add(Serialize("Data.TextLoader", input, output));
             }
 
-            public void Add(Microsoft.ML.Data.DataViewReference input, Microsoft.ML.Data.DataViewReference.Output output)
-            {
-                _jsonNodes.Add(Serialize("Data.DataViewReference", input, output));
-            }
             public Microsoft.ML.Models.AnomalyDetectionEvaluator.Output Add(Microsoft.ML.Models.AnomalyDetectionEvaluator input)
             {
                 var output = new Microsoft.ML.Models.AnomalyDetectionEvaluator.Output();
@@ -453,6 +466,18 @@ public void Add(Microsoft.ML.Trainers.GeneralizedAdditiveModelRegressor input, M
                 _jsonNodes.Add(Serialize("Trainers.GeneralizedAdditiveModelRegressor", input, output));
             }
 
+            public Microsoft.ML.Trainers.KMeansPlusPlusClusterer.Output Add(Microsoft.ML.Trainers.KMeansPlusPlusClusterer input)
+            {
+                var output = new Microsoft.ML.Trainers.KMeansPlusPlusClusterer.Output();
+                Add(input, output);
+                return output;
+            }
+
+            public void Add(Microsoft.ML.Trainers.KMeansPlusPlusClusterer input, Microsoft.ML.Trainers.KMeansPlusPlusClusterer.Output output)
+            {
+                _jsonNodes.Add(Serialize("Trainers.KMeansPlusPlusClusterer", input, output));
+            }
+
             public Microsoft.ML.Trainers.LinearSvmBinaryClassifier.Output Add(Microsoft.ML.Trainers.LinearSvmBinaryClassifier input)
             {
                 var output = new Microsoft.ML.Trainers.LinearSvmBinaryClassifier.Output();
@@ -1271,6 +1296,66 @@ public void Add(Microsoft.ML.Transforms.WordTokenizer input, Microsoft.ML.Transf
 
         }
     }
+    namespace Data
+    {
+
+        /// <summary>
+        /// Import a dataset from a text file
+        /// </summary>
+        [Obsolete("Use TextLoader instead.")]
+        public sealed partial class CustomTextLoader
+        {
+
+
+            /// <summary>
+            /// Location of the input file
+            /// </summary>
+            public Var<Microsoft.ML.Runtime.IFileHandle> InputFile { get; set; } = new Var<Microsoft.ML.Runtime.IFileHandle>();
+
+            /// <summary>
+            /// Custom schema to use for parsing
+            /// </summary>
+            public string CustomSchema { get; set; }
+
+
+            public sealed class Output
+            {
+                /// <summary>
+                /// The resulting data view
+                /// </summary>
+                public Var<Microsoft.ML.Runtime.Data.IDataView> Data { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
+
+            }
+        }
+    }
+
+    namespace Data
+    {
+
+        /// <summary>
+        /// Pass dataview from memory to experiment
+        /// </summary>
+        public sealed partial class DataViewReference
+        {
+
+
+            /// <summary>
+            /// Pointer to IDataView in memory
+            /// </summary>
+            public Var<Microsoft.ML.Runtime.Data.IDataView> Data { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
+
+
+            public sealed class Output
+            {
+                /// <summary>
+                /// The resulting data view
+                /// </summary>
+                public Var<Microsoft.ML.Runtime.Data.IDataView> Data { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
+
+            }
+        }
+    }
+
     namespace Data
     {
 
@@ -1328,40 +1413,185 @@ public sealed class Output
     namespace Data
     {
 
-        /// <summary>
-        /// Import a dataset from a text file
-        /// </summary>
-        public sealed partial class TextLoader
+        public sealed partial class TextLoaderArguments
         {
+            /// <summary>
+            /// Use separate parsing threads?
+            /// </summary>
+            public bool UseThreads { get; set; } = true;
 
+            /// <summary>
+            /// File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.
+            /// </summary>
+            public string HeaderFile { get; set; }
 
             /// <summary>
-            /// Location of the input file
+            /// Maximum number of rows to produce
             /// </summary>
-            public Var<Microsoft.ML.Runtime.IFileHandle> InputFile { get; set; } = new Var<Microsoft.ML.Runtime.IFileHandle>();
+            public long? MaxRows { get; set; }
 
             /// <summary>
-            /// Custom schema to use for parsing
+            /// Whether the input may include quoted values, which can contain separator characters, colons, and distinguish empty values from missing values. When true, consecutive separators denote a missing value and an empty value is denoted by "". When false, consecutive separators denote an empty value.
             /// </summary>
-            public string CustomSchema { get; set; }
+            public bool AllowQuoting { get; set; } = true;
+
+            /// <summary>
+            /// Whether the input may include sparse representations
+            /// </summary>
+            public bool AllowSparse { get; set; } = true;
 
+            /// <summary>
+            /// Number of source columns in the text data. Default is that sparse rows contain their size information.
+            /// </summary>
+            public int? InputSize { get; set; }
 
-            public sealed class Output
-            {
-                /// <summary>
-                /// The resulting data view
-                /// </summary>
-                public Var<Microsoft.ML.Runtime.Data.IDataView> Data { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
+            /// <summary>
+            /// Source column separator.
+            /// </summary>
+            public char[] Separator { get; set; } = { '\t' };
+
+            /// <summary>
+            /// Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40
+            /// </summary>
+            public TextLoaderColumn[] Column { get; set; }
+
+            /// <summary>
+            /// Remove trailing whitespace from lines
+            /// </summary>
+            public bool TrimWhitespace { get; set; } = false;
+
+            /// <summary>
+            /// Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.
+            /// </summary>
+            public bool HasHeader { get; set; } = false;
 
-            }
         }
 
-        public sealed partial class DataViewReference
+        public sealed partial class TextLoaderColumn
         {
+            /// <summary>
+            /// Name of the column
+            /// </summary>
+            public string Name { get; set; }
+
+            /// <summary>
+            /// Type of the items in the column
+            /// </summary>
+            public DataKind? Type { get; set; }
+
+            /// <summary>
+            /// Source index range(s) of the column
+            /// </summary>
+            public TextLoaderRange[] Source { get; set; }
+
+            /// <summary>
+            /// For a key column, this defines the range of values
+            /// </summary>
+            public KeyRange KeyRange { get; set; }
+
+        }
+
+        public sealed partial class TextLoaderRange
+        {
+            /// <summary>
+            /// First index in the range
+            /// </summary>
+            public int Min { get; set; }
+
+            /// <summary>
+            /// Last index in the range
+            /// </summary>
+            public int? Max { get; set; }
+
+            /// <summary>
+            /// This range extends to the end of the line, but should be a fixed number of items
+            /// </summary>
+            public bool AutoEnd { get; set; } = false;
+
+            /// <summary>
+            /// This range extends to the end of the line, which can vary from line to line
+            /// </summary>
+            public bool VariableEnd { get; set; } = false;
+
+            /// <summary>
+            /// This range includes only other indices not specified
+            /// </summary>
+            public bool AllOther { get; set; } = false;
+
+            /// <summary>
+            /// Force scalar columns to be treated as vectors of length one
+            /// </summary>
+            public bool ForceVector { get; set; } = false;
+
+        }
+
+        public sealed partial class KeyRange
+        {
+            /// <summary>
+            /// First index in the range
+            /// </summary>
+            public ulong Min { get; set; } = 0;
+
+            /// <summary>
+            /// Last index in the range
+            /// </summary>
+            public ulong? Max { get; set; }
+
+            /// <summary>
+            /// Whether the key is contiguous
+            /// </summary>
+            public bool Contiguous { get; set; } = true;
+
+        }
+
+        /// <summary>
+        /// Import a dataset from a text file
+        /// </summary>
+        public sealed partial class TextLoader : Microsoft.ML.ILearningPipelineLoader
+        {
+
+            [JsonIgnore]
+            private string _inputFilePath = null;
+            public TextLoader(string filePath)
+            {
+                _inputFilePath = filePath;
+            }
+            
+            public void SetInput(IHostEnvironment env, Experiment experiment)
+            {
+                IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false);
+                experiment.SetInput(InputFile, inputFile);
+            }
+            
+            public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
+            {
+                Contracts.Assert(previousStep == null);
+                
+                return new TextLoaderPipelineStep(experiment.Add(this));
+            }
+            
+            private class TextLoaderPipelineStep : ILearningPipelineDataStep
+            {
+                public TextLoaderPipelineStep (Output output)
+                {
+                    Data = output.Data;
+                    Model = null;
+                }
+
+                public Var<IDataView> Data { get; }
+                public Var<ITransformModel> Model { get; }
+            }
+
             /// <summary>
             /// Location of the input file
             /// </summary>
-            public Var<Microsoft.ML.Runtime.Data.IDataView> Data { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
+            public Var<Microsoft.ML.Runtime.IFileHandle> InputFile { get; set; } = new Var<Microsoft.ML.Runtime.IFileHandle>();
+
+            /// <summary>
+            /// Arguments
+            /// </summary>
+            public Data.TextLoaderArguments Arguments { get; set; } = new Data.TextLoaderArguments();
+
 
             public sealed class Output
             {
@@ -1561,7 +1791,7 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ICla
     namespace Models
     {
 
-        public sealed class CrossValidationBinaryMacroSubGraphInput
+        public sealed partial class CrossValidationBinaryMacroSubGraphInput
         {
             /// <summary>
             /// The data to be used for training
@@ -1570,7 +1800,7 @@ public sealed class CrossValidationBinaryMacroSubGraphInput
 
         }
 
-        public sealed class CrossValidationBinaryMacroSubGraphOutput
+        public sealed partial class CrossValidationBinaryMacroSubGraphOutput
         {
             /// <summary>
             /// The model
@@ -1826,7 +2056,7 @@ public enum MacroUtilsTrainerKinds
         }
 
 
-        public sealed class CrossValidationMacroSubGraphInput
+        public sealed partial class CrossValidationMacroSubGraphInput
         {
             /// <summary>
             /// The data to be used for training
@@ -1835,7 +2065,7 @@ public sealed class CrossValidationMacroSubGraphInput
 
         }
 
-        public sealed class CrossValidationMacroSubGraphOutput
+        public sealed partial class CrossValidationMacroSubGraphOutput
         {
             /// <summary>
             /// The model
@@ -2239,7 +2469,7 @@ public enum CachingOptions
         }
 
 
-        public sealed class OneVersusAllMacroSubGraphOutput
+        public sealed partial class OneVersusAllMacroSubGraphOutput
         {
             /// <summary>
             /// The predictor model for the subgraph exemplar.
@@ -2877,7 +3107,7 @@ public sealed class Output
     namespace Models
     {
 
-        public sealed class TrainTestBinaryMacroSubGraphInput
+        public sealed partial class TrainTestBinaryMacroSubGraphInput
         {
             /// <summary>
             /// The data to be used for training
@@ -2886,7 +3116,7 @@ public sealed class TrainTestBinaryMacroSubGraphInput
 
         }
 
-        public sealed class TrainTestBinaryMacroSubGraphOutput
+        public sealed partial class TrainTestBinaryMacroSubGraphOutput
         {
             /// <summary>
             /// The model
@@ -2962,7 +3192,7 @@ public sealed class Output
     namespace Models
     {
 
-        public sealed class TrainTestMacroSubGraphInput
+        public sealed partial class TrainTestMacroSubGraphInput
         {
             /// <summary>
             /// The data to be used for training
@@ -2971,7 +3201,7 @@ public sealed class TrainTestMacroSubGraphInput
 
         }
 
-        public sealed class TrainTestMacroSubGraphOutput
+        public sealed partial class TrainTestMacroSubGraphOutput
         {
             /// <summary>
             /// The model
@@ -5686,6 +5916,107 @@ public GeneralizedAdditiveModelRegressorPipelineStep(Output output)
         }
     }
 
+    namespace Trainers
+    {
+        public enum KMeansPlusPlusTrainerInitAlgorithm
+        {
+            KMeansPlusPlus = 0,
+            Random = 1,
+            KMeansParallel = 2
+        }
+
+
+        /// <summary>
+        /// K-means is a popular clustering algorithm. With K-means, the data is clustered into a specified number of clusters in order to minimize the within-cluster sum of squares. K-means++ improves upon K-means by using a better method for choosing the initial cluster centers.
+        /// </summary>
+        public sealed partial class KMeansPlusPlusClusterer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
+        {
+
+
+            /// <summary>
+            /// The number of clusters
+            /// </summary>
+            [TlcModule.SweepableDiscreteParamAttribute("K", new object[]{5, 10, 20, 40})]
+            public int K { get; set; } = 5;
+
+            /// <summary>
+            /// Cluster initialization algorithm
+            /// </summary>
+            public Trainers.KMeansPlusPlusTrainerInitAlgorithm InitAlgorithm { get; set; } = Trainers.KMeansPlusPlusTrainerInitAlgorithm.KMeansParallel;
+
+            /// <summary>
+            /// Tolerance parameter for trainer convergence. Lower = slower, more accurate
+            /// </summary>
+            public float OptTol { get; set; } = 1E-07f;
+
+            /// <summary>
+            /// Maximum number of iterations.
+            /// </summary>
+            public int MaxIterations { get; set; } = 1000;
+
+            /// <summary>
+            /// Memory budget (in MBs) to use for KMeans acceleration
+            /// </summary>
+            public int AccelMemBudgetMb { get; set; } = 4096;
+
+            /// <summary>
+            /// Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.
+            /// </summary>
+            public int? NumThreads { get; set; }
+
+            /// <summary>
+            /// The data to be used for training
+            /// </summary>
+            public Var<Microsoft.ML.Runtime.Data.IDataView> TrainingData { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
+
+            /// <summary>
+            /// Column to use for features
+            /// </summary>
+            public string FeatureColumn { get; set; } = "Features";
+
+            /// <summary>
+            /// Normalize option for the feature column
+            /// </summary>
+            public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto;
+
+            /// <summary>
+            /// Whether learner should cache input training data
+            /// </summary>
+            public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto;
+
+
+            public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IClusteringOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput
+            {
+                /// <summary>
+                /// The trained model
+                /// </summary>
+                public Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel> PredictorModel { get; set; } = new Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel>();
+
+            }
+            public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
+            {
+                if (!(previousStep is ILearningPipelineDataStep dataStep))
+                {
+                    throw new InvalidOperationException($"{ nameof(KMeansPlusPlusClusterer)} only supports an { nameof(ILearningPipelineDataStep)} as an input.");
+                }
+
+                TrainingData = dataStep.Data;
+                Output output = experiment.Add(this);
+                return new KMeansPlusPlusClustererPipelineStep(output);
+            }
+
+            private class KMeansPlusPlusClustererPipelineStep : ILearningPipelinePredictorStep
+            {
+                public KMeansPlusPlusClustererPipelineStep(Output output)
+                {
+                    Model = output.PredictorModel;
+                }
+
+                public Var<IPredictorModel> Model { get; }
+            }
+        }
+    }
+
     namespace Trainers
     {
 
@@ -7196,7 +7527,7 @@ public BinaryPredictionScoreColumnsRenamerPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class NormalizeTransformBinColumn : OneToOneColumn<NormalizeTransformBinColumn>, IOneToOneColumn
+        public sealed partial class NormalizeTransformBinColumn : OneToOneColumn<NormalizeTransformBinColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Max number of bins, power of 2 recommended
@@ -7348,7 +7679,7 @@ public enum CategoricalTransformOutputKind : byte
         }
 
 
-        public sealed class CategoricalHashTransformColumn : OneToOneColumn<CategoricalHashTransformColumn>, IOneToOneColumn
+        public sealed partial class CategoricalHashTransformColumn : OneToOneColumn<CategoricalHashTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// The number of bits to hash into. Must be between 1 and 30, inclusive.
@@ -7518,7 +7849,7 @@ public enum TermTransformSortOrder : byte
         }
 
 
-        public sealed class CategoricalTransformColumn : OneToOneColumn<CategoricalTransformColumn>, IOneToOneColumn
+        public sealed partial class CategoricalTransformColumn : OneToOneColumn<CategoricalTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector
@@ -7682,7 +8013,7 @@ public CategoricalOneHotVectorizerPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class CharTokenizeTransformColumn : OneToOneColumn<CharTokenizeTransformColumn>, IOneToOneColumn
+        public sealed partial class CharTokenizeTransformColumn : OneToOneColumn<CharTokenizeTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Name of the new column
@@ -7801,7 +8132,7 @@ public CharacterTokenizerPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class ConcatTransformColumn : ManyToOneColumn<ConcatTransformColumn>, IManyToOneColumn
+        public sealed partial class ConcatTransformColumn : ManyToOneColumn<ConcatTransformColumn>, IManyToOneColumn
         {
             /// <summary>
             /// Name of the new column
@@ -7891,7 +8222,7 @@ public ColumnConcatenatorPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class CopyColumnsTransformColumn : OneToOneColumn<CopyColumnsTransformColumn>, IOneToOneColumn
+        public sealed partial class CopyColumnsTransformColumn : OneToOneColumn<CopyColumnsTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Name of the new column
@@ -8153,7 +8484,7 @@ public enum DataKind : byte
         }
 
 
-        public sealed class ConvertTransformColumn : OneToOneColumn<ConvertTransformColumn>, IOneToOneColumn
+        public sealed partial class ConvertTransformColumn : OneToOneColumn<ConvertTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// The result type
@@ -8352,7 +8683,7 @@ public CombinerByContiguousGroupIdPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class NormalizeTransformAffineColumn : OneToOneColumn<NormalizeTransformAffineColumn>, IOneToOneColumn
+        public sealed partial class NormalizeTransformAffineColumn : OneToOneColumn<NormalizeTransformAffineColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Whether to map zero to zero, preserving sparsity
@@ -8625,7 +8956,7 @@ public sealed class Output
     namespace Transforms
     {
 
-        public sealed class TermTransformColumn : OneToOneColumn<TermTransformColumn>, IOneToOneColumn
+        public sealed partial class TermTransformColumn : OneToOneColumn<TermTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Maximum number of terms to keep when auto-training
@@ -8979,7 +9310,7 @@ public FeatureSelectorByMutualInformationPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class LpNormNormalizerTransformGcnColumn : OneToOneColumn<LpNormNormalizerTransformGcnColumn>, IOneToOneColumn
+        public sealed partial class LpNormNormalizerTransformGcnColumn : OneToOneColumn<LpNormNormalizerTransformGcnColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Normalize by standard deviation rather than L2 norm
@@ -9123,7 +9454,7 @@ public GlobalContrastNormalizerPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class HashJoinTransformColumn : OneToOneColumn<HashJoinTransformColumn>, IOneToOneColumn
+        public sealed partial class HashJoinTransformColumn : OneToOneColumn<HashJoinTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Whether the values need to be combined for a single hash
@@ -9282,7 +9613,7 @@ public HashConverterPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class KeyToValueTransformColumn : OneToOneColumn<KeyToValueTransformColumn>, IOneToOneColumn
+        public sealed partial class KeyToValueTransformColumn : OneToOneColumn<KeyToValueTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Name of the new column
@@ -9461,7 +9792,7 @@ public LabelColumnKeyBooleanConverterPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class LabelIndicatorTransformColumn : OneToOneColumn<LabelIndicatorTransformColumn>, IOneToOneColumn
+        public sealed partial class LabelIndicatorTransformColumn : OneToOneColumn<LabelIndicatorTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// The positive example class for binary classification.
@@ -9645,7 +9976,7 @@ public LabelToFloatConverterPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class NormalizeTransformLogNormalColumn : OneToOneColumn<NormalizeTransformLogNormalColumn>, IOneToOneColumn
+        public sealed partial class NormalizeTransformLogNormalColumn : OneToOneColumn<NormalizeTransformLogNormalColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Max number of examples used to train the normalizer
@@ -9782,7 +10113,7 @@ public enum LpNormNormalizerTransformNormalizerKind : byte
         }
 
 
-        public sealed class LpNormNormalizerTransformColumn : OneToOneColumn<LpNormNormalizerTransformColumn>, IOneToOneColumn
+        public sealed partial class LpNormNormalizerTransformColumn : OneToOneColumn<LpNormNormalizerTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// The norm to use to normalize each sample
@@ -10185,7 +10516,7 @@ public enum NAHandleTransformReplacementKind
         }
 
 
-        public sealed class NAHandleTransformColumn : OneToOneColumn<NAHandleTransformColumn>, IOneToOneColumn
+        public sealed partial class NAHandleTransformColumn : OneToOneColumn<NAHandleTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// The replacement method to utilize
@@ -10329,7 +10660,7 @@ public MissingValueHandlerPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class NAIndicatorTransformColumn : OneToOneColumn<NAIndicatorTransformColumn>, IOneToOneColumn
+        public sealed partial class NAIndicatorTransformColumn : OneToOneColumn<NAIndicatorTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Name of the new column
@@ -10443,7 +10774,7 @@ public MissingValueIndicatorPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class NADropTransformColumn : OneToOneColumn<NADropTransformColumn>, IOneToOneColumn
+        public sealed partial class NADropTransformColumn : OneToOneColumn<NADropTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Name of the new column
@@ -10637,7 +10968,7 @@ public enum NAReplaceTransformReplacementKind
         }
 
 
-        public sealed class NAReplaceTransformColumn : OneToOneColumn<NAReplaceTransformColumn>, IOneToOneColumn
+        public sealed partial class NAReplaceTransformColumn : OneToOneColumn<NAReplaceTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Replacement value for NAs (uses default value if not given)
@@ -10810,7 +11141,7 @@ public enum NgramTransformWeightingCriteria
         }
 
 
-        public sealed class NgramTransformColumn : OneToOneColumn<NgramTransformColumn>, IOneToOneColumn
+        public sealed partial class NgramTransformColumn : OneToOneColumn<NgramTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Maximum ngram length
@@ -11149,7 +11480,7 @@ public PredictedLabelColumnOriginalValueConverterPipelineStep(Output output)
     namespace Transforms
     {
 
-        public sealed class GenerateNumberTransformColumn
+        public sealed partial class GenerateNumberTransformColumn
         {
             /// <summary>
             /// Name of the new column
@@ -11888,7 +12219,7 @@ public enum TextTransformTextNormKind
         }
 
 
-        public sealed class TextTransformColumn : ManyToOneColumn<TextTransformColumn>, IManyToOneColumn
+        public sealed partial class TextTransformColumn : ManyToOneColumn<TextTransformColumn>, IManyToOneColumn
         {
             /// <summary>
             /// Name of the new column
@@ -11902,7 +12233,7 @@ public sealed class TextTransformColumn : ManyToOneColumn<TextTransformColumn>,
 
         }
 
-        public sealed class TermLoaderArguments
+        public sealed partial class TermLoaderArguments
         {
             /// <summary>
             /// List of terms
@@ -12317,7 +12648,7 @@ public sealed class Output
     namespace Transforms
     {
 
-        public sealed class DelimitedTokenizeTransformColumn : OneToOneColumn<DelimitedTokenizeTransformColumn>, IOneToOneColumn
+        public sealed partial class DelimitedTokenizeTransformColumn : OneToOneColumn<DelimitedTokenizeTransformColumn>, IOneToOneColumn
         {
             /// <summary>
             /// Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character.
diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs
new file mode 100644
index 0000000000..3c8550ef09
--- /dev/null
+++ b/src/Microsoft.ML/Data/TextLoader.cs
@@ -0,0 +1,179 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Api;
+using Microsoft.ML.Runtime.Data;
+using System;
+using System.Linq;
+using System.Reflection;
+using System.Text.RegularExpressions;
+
+namespace Microsoft.ML.Data
+{
+    public sealed partial class TextLoaderRange
+    {
+        public TextLoaderRange()
+        {
+        }
+
+        /// <summary>
+        /// Convenience constructor for the scalar case, when a given column 
+        /// in the schema spans only a single column in the dataset.
+        /// <see cref="Min"/> and <see cref="Max"/> are set to the single value <paramref name="ordinal"/>.
+        /// </summary>
+        /// <param name="ordinal">Column index in the dataset.</param>
+        public TextLoaderRange(int ordinal)
+        {
+
+            Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Cannot be a negative number");
+
+            Min = ordinal;
+            Max = ordinal;
+        }
+
+        /// <summary>
+        /// Convenience constructor for the vector case, when a given column 
+        /// in the schema spans contiguous columns in the dataset.
+        /// </summary>
+        /// <param name="min">Starting column index in the dataset.</param>
+        /// <param name="max">Ending column index in the dataset.</param>
+        public TextLoaderRange(int min, int max)
+        {
+
+            Contracts.CheckParam(min >= 0, nameof(min), "Cannot be a negative number.");
+            Contracts.CheckParam(max >= min, nameof(max), "Cannot be less than " + nameof(min) +".");
+
+            Min = min;
+            Max = max;
+        }
+    }
+
+    public sealed partial class TextLoader
+    {
+        /// <summary>
+        /// Construct a TextLoader object by inferencing the dataset schema from a type.
+        /// </summary>
+        /// <param name="useHeader">Does the file contains header?</param>
+        /// <param name="separator">Column separator character. Default is '\t'</param>
+        /// <param name="allowQuotedStrings">Whether the input may include quoted values, 
+        /// which can contain separator characters, colons,
+        /// and distinguish empty values from missing values. When true, consecutive separators 
+        /// denote a missing value and an empty value is denoted by \"\". 
+        /// When false, consecutive separators denote an empty value.</param>
+        /// <param name="supportSparse">Whether the input may include sparse representations e.g. 
+        /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero 
+        /// except for 3rd and 5th columns which have values 6 and 3</param>
+        /// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
+        public TextLoader CreateFrom<TInput>(bool useHeader = false,
+            char separator = '\t', bool allowQuotedStrings = true,
+            bool supportSparse = true, bool trimWhitespace = false)
+        {
+            var fields = typeof(TInput).GetFields();
+            Arguments.Column = new TextLoaderColumn[fields.Length];
+            for (int index = 0; index < fields.Length; index++)
+            {
+                var field = fields[index];
+                var mappingAttr = field.GetCustomAttribute<ColumnAttribute>();
+                if (mappingAttr == null)
+                    throw Contracts.Except($"{field.Name} is missing ColumnAttribute");
+
+                if (Regex.Match(mappingAttr.Ordinal, @"[^(0-9,\*\-~)]+").Success)
+                    throw Contracts.Except($"{mappingAttr.Ordinal} contains invalid characters. " +
+                        $"Valid characters are 0-9, *, - and ~");
+
+                var name = mappingAttr.Name ?? field.Name;
+                if (name.Any(c => !Char.IsLetterOrDigit(c)))
+                    throw Contracts.Except($"{name} is not alphanumeric.");
+
+                Runtime.Data.TextLoader.Range[] sources;
+                if (!Runtime.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Ordinal, out sources))
+                    throw Contracts.Except($"{mappingAttr.Ordinal} could not be parsed.");
+
+                Contracts.Assert(sources != null);
+
+                TextLoaderColumn tlc = new TextLoaderColumn();
+                tlc.Name = name;
+                tlc.Source = new TextLoaderRange[sources.Length];
+                DataKind dk;
+                if (!TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk))
+                    throw Contracts.Except($"{name} is of unsupported type.");
+
+                tlc.Type = dk;
+
+                for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++)
+                {
+                    tlc.Source[indexLocal] = new TextLoaderRange
+                    {
+                        AllOther = sources[indexLocal].AllOther,
+                        AutoEnd = sources[indexLocal].AutoEnd,
+                        ForceVector = sources[indexLocal].ForceVector,
+                        VariableEnd = sources[indexLocal].VariableEnd,
+                        Max = sources[indexLocal].Max,
+                        Min = sources[indexLocal].Min
+                    };
+                }
+
+                Arguments.Column[index] = tlc;
+            }
+
+            Arguments.HasHeader = useHeader;
+            Arguments.Separator = new[] { separator };
+            Arguments.AllowQuoting = allowQuotedStrings;
+            Arguments.AllowSparse = supportSparse;
+            Arguments.TrimWhitespace = trimWhitespace;
+
+            return this;
+        }
+
+        /// <summary>
+        /// Try to map a System.Type to a corresponding DataKind value.
+        /// </summary>
+        private static bool TryGetDataKind(Type type, out DataKind kind)
+        {
+            Contracts.AssertValue(type);
+
+            // REVIEW: Make this more efficient. Should we have a global dictionary?
+            if (type == typeof(DvInt1) || type == typeof(sbyte))
+                kind = DataKind.I1;
+            else if (type == typeof(byte) || type == typeof(char))
+                kind = DataKind.U1;
+            else if (type == typeof(DvInt2) || type == typeof(short))
+                kind = DataKind.I2;
+            else if (type == typeof(ushort))
+                kind = DataKind.U2;
+            else if (type == typeof(DvInt4) || type == typeof(int))
+                kind = DataKind.I4;
+            else if (type == typeof(uint))
+                kind = DataKind.U4;
+            else if (type == typeof(DvInt8) || type == typeof(long))
+                kind = DataKind.I8;
+            else if (type == typeof(ulong))
+                kind = DataKind.U8;
+            else if (type == typeof(Single))
+                kind = DataKind.R4;
+            else if (type == typeof(Double))
+                kind = DataKind.R8;
+            else if (type == typeof(DvText) || type == typeof(string))
+                kind = DataKind.TX;
+            else if (type == typeof(DvBool) || type == typeof(bool))
+                kind = DataKind.BL;
+            else if (type == typeof(DvTimeSpan) || type == typeof(TimeSpan))
+                kind = DataKind.TS;
+            else if (type == typeof(DvDateTime) || type == typeof(DateTime))
+                kind = DataKind.DT;
+            else if (type == typeof(DvDateTimeZone) || type == typeof(TimeZoneInfo))
+                kind = DataKind.DZ;
+            else if (type == typeof(UInt128))
+                kind = DataKind.UG;
+            else
+            {
+                kind = default(DataKind);
+                return false;
+            }
+
+            return true;
+        }
+    }
+}
diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs
index 51677afbf4..0e554734ea 100644
--- a/src/Microsoft.ML/LearningPipeline.cs
+++ b/src/Microsoft.ML/LearningPipeline.cs
@@ -1,4 +1,4 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
+// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
@@ -68,7 +68,7 @@ public LearningPipeline()
         /// Possible data loader(s), transforms and trainers options are
         /// <para>
         /// Data Loader:
-        ///     <see cref="Microsoft.ML.TextLoader{TInput}" />
+        ///     <see cref="Microsoft.ML.Data.TextLoader" />
         ///     etc.
         /// </para>
         /// <para>
@@ -154,7 +154,6 @@ public PredictionModel<TInput, TOutput> Train<TInput, TOutput>()
                     step = currentItem.ApplyStep(step, experiment);
                     if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null)
                         transformModels.Add(dataStep.Model);
-
                     else if (step is ILearningPipelinePredictorStep predictorDataStep)
                     {
                         if (lastTransformModel != null)
diff --git a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs
index 8038294398..41048000d8 100644
--- a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs
+++ b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs
@@ -27,13 +27,25 @@ public sealed class Input
             public string CustomSchema = null;
         }
 
+        [TlcModule.EntryPointKind(typeof(ILearningPipelineLoader))]
+        public sealed class LoaderInput
+        {
+            [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Location of the input file", SortOrder = 1)]
+            public IFileHandle InputFile;
+
+            [Argument(ArgumentType.Required, ShortName = "args", HelpText = "Arguments", SortOrder = 2)]
+            public TextLoader.Arguments Arguments = new TextLoader.Arguments();
+        }
+
         public sealed class Output
         {
             [TlcModule.Output(Desc = "The resulting data view", SortOrder = 1)]
             public IDataView Data;
         }
 
-        [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")]
+#pragma warning disable 0618
+        [Obsolete("Use TextLoader instead.")]
+        [TlcModule.EntryPoint(Name = "Data.CustomTextLoader", Desc = "Import a dataset from a text file")]
         public static Output ImportText(IHostEnvironment env, Input input)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -43,5 +55,17 @@ public static Output ImportText(IHostEnvironment env, Input input)
             var loader = host.CreateLoader(string.Format("Text{{{0}}}", input.CustomSchema), new FileHandleSource(input.InputFile));
             return new Output { Data = loader };
         }
+#pragma warning restore 0618
+
+        [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")]
+        public static Output TextLoader(IHostEnvironment env, LoaderInput input)
+        {
+            Contracts.CheckValue(env, nameof(env));
+            var host = env.Register("ImportTextData");
+            env.CheckValue(input, nameof(input));
+            EntryPointUtils.CheckInputArgs(host, input);
+            var loader = host.CreateLoader(input.Arguments, new FileHandleSource(input.InputFile)); 
+            return new Output { Data = loader };
+        }
     }
 }
diff --git a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs
index 8b199045fa..40909ad108 100644
--- a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs
+++ b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs
@@ -93,7 +93,8 @@ public static string CreateStratificationColumn(IHost host, ref IDataView data,
                     new HashJoinTransform.Arguments
                     {
                         Column = new[] { new HashJoinTransform.Column { Name = stratCol, Source = stratificationColumn } },
-                        Join = true
+                        Join = true,
+                        HashBits = 30
                     }, data);
             }
 
diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs
index f1e45fa446..5fabb15840 100644
--- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs
+++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs
@@ -177,6 +177,37 @@ public static string Capitalize(string s)
                 return char.ToUpperInvariant(s[0]) + s.Substring(1);
             }
 
+            private static string GetCharAsString(char value)
+            {
+                switch (value)
+                {
+                    case '\t':
+                        return "\\t";
+                    case '\n':
+                        return "\\n";
+                    case '\r':
+                        return "\\r";
+                    case '\\':
+                        return "\\";
+                    case '\"':
+                        return "\"";
+                    case '\'':
+                        return "\\'";
+                    case '\0':
+                        return "\\0";
+                    case '\a':
+                        return "\\a";
+                    case '\b':
+                        return "\\b";
+                    case '\f':
+                        return "\\f";
+                    case '\v':
+                        return "\\v";
+                    default:
+                        return value.ToString();
+                }
+            }
+
             public static string GetValue(ModuleCatalog catalog, Type fieldType, object fieldValue,
                 Dictionary<string, string> typesSymbolTable, string rootNameSpace = "")
             {
@@ -264,7 +295,7 @@ public static string GetValue(ModuleCatalog catalog, Type fieldType, object fiel
                     case TlcModule.DataKind.Enum:
                         return GetEnumName(fieldType, typesSymbolTable, rootNameSpace) + "." + fieldValue;
                     case TlcModule.DataKind.Char:
-                        return $"'{(char)fieldValue}'";
+                        return $"'{GetCharAsString((char)fieldValue)}'";
                     case TlcModule.DataKind.Component:
                         var type = fieldValue.GetType();
                         ModuleCatalog.ComponentInfo componentInfo;
@@ -685,7 +716,7 @@ private void GenerateStructs(IndentingTextWriter writer,
                     classBase = $" : OneToOneColumn<{_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}>, IOneToOneColumn";
                 else if (type.IsSubclassOf(typeof(ManyToOneColumn)))
                     classBase = $" : ManyToOneColumn<{_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}>, IManyToOneColumn";
-                writer.WriteLine($"public sealed class {_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}{classBase}");
+                writer.WriteLine($"public sealed partial class {_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}{classBase}");
                 writer.WriteLine("{");
                 writer.Indent();
                 GenerateInputFields(writer, type, catalog, _typesSymbolTable);
@@ -696,6 +727,58 @@ private void GenerateStructs(IndentingTextWriter writer,
             }
         }
 
+        private void GenerateLoaderAddInputMethod(IndentingTextWriter writer, string className)
+        {
+            //Constructor.
+            writer.WriteLine("[JsonIgnore]");
+            writer.WriteLine("private string _inputFilePath = null;");
+            writer.WriteLine($"public {className}(string filePath)");
+            writer.WriteLine("{");
+            writer.Indent();
+            writer.WriteLine("_inputFilePath = filePath;");
+            writer.Outdent();
+            writer.WriteLine("}");
+            writer.WriteLine("");
+
+            //SetInput.
+            writer.WriteLine($"public void SetInput(IHostEnvironment env, Experiment experiment)");
+            writer.WriteLine("{");
+            writer.Indent();
+            writer.WriteLine("IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false);");
+            writer.WriteLine("experiment.SetInput(InputFile, inputFile);");
+            writer.Outdent();
+            writer.WriteLine("}");
+            writer.WriteLine("");
+
+            //Apply.
+            writer.WriteLine($"public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)");
+            writer.WriteLine("{");
+            writer.Indent();
+            writer.WriteLine("Contracts.Assert(previousStep == null);");
+            writer.WriteLine("");
+            writer.WriteLine($"return new {className}PipelineStep(experiment.Add(this));");
+            writer.Outdent();
+            writer.WriteLine("}");
+            writer.WriteLine("");
+
+            //Pipelinestep class.
+            writer.WriteLine($"private class {className}PipelineStep : ILearningPipelineDataStep");
+            writer.WriteLine("{");
+            writer.Indent();
+            writer.WriteLine($"public {className}PipelineStep (Output output)");
+            writer.WriteLine("{");
+            writer.Indent();
+            writer.WriteLine("Data = output.Data;");
+            writer.WriteLine("Model = null;");
+            writer.Outdent();
+            writer.WriteLine("}");
+            writer.WriteLine();
+            writer.WriteLine("public Var<IDataView> Data { get; }");
+            writer.WriteLine("public Var<ITransformModel> Model { get; }");
+            writer.Outdent();
+            writer.WriteLine("}");
+        }
+
         private void GenerateColumnAddMethods(IndentingTextWriter writer,
             Type inputType,
             ModuleCatalog catalog,
@@ -842,10 +925,11 @@ private void GenerateInput(IndentingTextWriter writer,
             var classAndMethod = GeneratorUtils.GetClassAndMethodNames(entryPointInfo);
             string classBase = "";
             if (entryPointInfo.InputKinds != null)
+            {
                 classBase += $" : {string.Join(", ", entryPointInfo.InputKinds.Select(GeneratorUtils.GetCSharpTypeName))}";
-
-            if (classBase.Contains("ITransformInput") || classBase.Contains("ITrainerInput"))
-                classBase += ", Microsoft.ML.ILearningPipelineItem";
+                if (entryPointInfo.InputKinds.Any(t => typeof(ITrainerInput).IsAssignableFrom(t) || typeof(ITransformInput).IsAssignableFrom(t)))
+                    classBase += ", Microsoft.ML.ILearningPipelineItem";
+            }
 
             GenerateEnums(writer, entryPointInfo.InputType, classAndMethod.Item1);
             writer.WriteLine();
@@ -854,10 +938,17 @@ private void GenerateInput(IndentingTextWriter writer,
             foreach (var line in entryPointInfo.Description.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
                 writer.WriteLine($"/// {line}");
             writer.WriteLine("/// </summary>");
+            
+            if(entryPointInfo.ObsoleteAttribute != null)
+                writer.WriteLine($"[Obsolete(\"{entryPointInfo.ObsoleteAttribute.Message}\")]");
+            
             writer.WriteLine($"public sealed partial class {classAndMethod.Item2}{classBase}");
             writer.WriteLine("{");
             writer.Indent();
             writer.WriteLine();
+            if (entryPointInfo.InputKinds != null && entryPointInfo.InputKinds.Any(t => typeof(ILearningPipelineLoader).IsAssignableFrom(t)))
+                GenerateLoaderAddInputMethod(writer, classAndMethod.Item2);
+
             GenerateColumnAddMethods(writer, entryPointInfo.InputType, catalog, classAndMethod.Item2, out Type transformType);
             writer.WriteLine();
             GenerateInputFields(writer, entryPointInfo.InputType, catalog, _typesSymbolTable);
diff --git a/src/Microsoft.ML/TextLoader.cs b/src/Microsoft.ML/TextLoader.cs
deleted file mode 100644
index 4e3e3fb8e4..0000000000
--- a/src/Microsoft.ML/TextLoader.cs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using Microsoft.ML.Runtime;
-using Microsoft.ML.Runtime.Api;
-using Microsoft.ML.Runtime.Data;
-using Microsoft.ML.Runtime.EntryPoints;
-using System;
-using System.Linq;
-using System.Reflection;
-using System.Text;
-
-namespace Microsoft.ML
-{
-    public class TextLoader<TInput> : ILearningPipelineLoader
-    {
-        private string _inputFilePath;
-        private string CustomSchema;
-        private Data.TextLoader ImportTextInput;
-
-        /// <summary>
-        /// Construct a TextLoader object
-        /// </summary>
-        /// <param name="inputFilePath">Data file path</param>
-        /// <param name="useHeader">Does the file contains header?</param>
-        /// <param name="separator">How the columns are seperated? 
-        /// Options: separator="tab", separator="space", separator="comma" or separator=[single character]. 
-        /// By default separator=null means "tab"</param>
-        /// <param name="allowQuotedStrings">Whether the input may include quoted values, 
-        /// which can contain separator characters, colons,
-        /// and distinguish empty values from missing values. When true, consecutive separators 
-        /// denote a missing value and an empty value is denoted by \"\". 
-        /// When false, consecutive separators denote an empty value.</param>
-        /// <param name="supportSparse">Whether the input may include sparse representations e.g. 
-        /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero 
-        /// except for 3rd and 5th columns which have values 6 and 3</param>
-        /// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
-        public TextLoader(string inputFilePath, bool useHeader = false, 
-            string separator = null, bool allowQuotedStrings = true, 
-            bool supportSparse = true, bool trimWhitespace = false)
-        {
-            _inputFilePath = inputFilePath;
-            SetCustomStringFromType(useHeader, separator, allowQuotedStrings, supportSparse, trimWhitespace);
-        }
-
-        private IFileHandle GetTextLoaderFileHandle(IHostEnvironment env, string trainFilePath) =>
-            new SimpleFileHandle(env, trainFilePath, false, false);
-
-        private void SetCustomStringFromType(bool useHeader, string separator, 
-            bool allowQuotedStrings, bool supportSparse, bool trimWhitespace)
-        {
-            StringBuilder schemaBuilder = new StringBuilder(CustomSchema);
-            foreach (var field in typeof(TInput).GetFields())
-            {
-                var mappingAttr = field.GetCustomAttribute<ColumnAttribute>();
-                if(mappingAttr == null)
-                    throw Contracts.ExceptParam(field.Name, $"{field.Name} is missing ColumnAttribute");
-                
-                schemaBuilder.AppendFormat("col={0}:{1}:{2} ",
-                    mappingAttr.Name ?? field.Name, 
-                    TypeToName(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType), 
-                    mappingAttr.Ordinal);
-            }
-
-            if (useHeader)
-                schemaBuilder.Append(nameof(TextLoader.Arguments.HasHeader)).Append("+ ");
-
-            if (separator != null)
-                schemaBuilder.Append(nameof(TextLoader.Arguments.Separator)).Append("=").Append(separator).Append(" ");
-
-            if (!allowQuotedStrings)
-                schemaBuilder.Append(nameof(TextLoader.Arguments.AllowQuoting)).Append("- ");
-
-            if (!supportSparse)
-                schemaBuilder.Append(nameof(TextLoader.Arguments.AllowSparse)).Append("- ");
-
-            if (trimWhitespace)
-                schemaBuilder.Append(nameof(TextLoader.Arguments.TrimWhitespace)).Append("+ ");
-
-            schemaBuilder.Length--;
-            CustomSchema = schemaBuilder.ToString();
-        }
-
-        private string TypeToName(Type type)
-        {
-            if (type == typeof(string))
-                return "TX";
-            else if (type == typeof(float) || type == typeof(double))
-                return "R4";
-            else if (type == typeof(bool))
-                return "BL";
-            else
-                throw new System.NotSupportedException("Type ${type.FullName} is not implemented or supported."); //Add more types.
-        }
-
-        public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
-        {
-            Contracts.Assert(previousStep == null);
-
-            ImportTextInput = new Data.TextLoader();
-            ImportTextInput.CustomSchema = CustomSchema;
-            var importOutput = experiment.Add(ImportTextInput);
-            return new TextLoaderPipelineStep(importOutput.Data);
-        }
-
-        public void SetInput(IHostEnvironment env, Experiment experiment)
-        {
-            IFileHandle inputFile = GetTextLoaderFileHandle(env, _inputFilePath);
-            experiment.SetInput(ImportTextInput.InputFile, inputFile);
-        }
-
-        private class TextLoaderPipelineStep : ILearningPipelineDataStep
-        {
-            public TextLoaderPipelineStep(Var<IDataView> data)
-            {
-                Data = data;
-            }
-
-            public Var<IDataView> Data { get; }
-            public Var<ITransformModel> Model => null;
-        }
-    }
-}
diff --git a/src/Native/build.cmd b/src/Native/build.cmd
index 166773183c..e2bbc3a4dc 100644
--- a/src/Native/build.cmd
+++ b/src/Native/build.cmd
@@ -27,7 +27,6 @@ shift
 goto :Arg_Loop
 
 :ToolsVersion
-
 if defined VisualStudioVersion goto :RunVCVars
 
 set _VSWHERE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
@@ -37,7 +36,8 @@ if exist %_VSWHERE% (
 if not exist "%_VSCOMNTOOLS%" set _VSCOMNTOOLS=%VS140COMNTOOLS%
 if not exist "%_VSCOMNTOOLS%" goto :MissingVersion
 
-set VSCMD_START_DIR="%__currentScriptDir%"
+
+set "VSCMD_START_DIR=%__currentScriptDir%"
 call "%_VSCOMNTOOLS%\VsDevCmd.bat"
 
 :RunVCVars
@@ -92,8 +92,8 @@ if not exist "%__IntermediatesDir%" md "%__IntermediatesDir%"
 
 :: Regenerate the VS solution
 
-set __gen-buildsys-win-path=%__currentScriptDir%\gen-buildsys-win.bat
-set __source-code-path=%__currentScriptDir%
+set "__gen-buildsys-win-path=%__currentScriptDir%\gen-buildsys-win.bat"
+set "__source-code-path=%__currentScriptDir%"
 
 echo Calling "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch%
 pushd "%__IntermediatesDir%"
diff --git a/src/Native/build.proj b/src/Native/build.proj
index 1bfab0639c..83efe29a46 100644
--- a/src/Native/build.proj
+++ b/src/Native/build.proj
@@ -44,7 +44,7 @@
 
     <!-- Run script that invokes Cmake to create VS files, and then calls msbuild to compile them -->
     <Message Text="$(MSBuildProjectDirectory)\build.cmd $(BuildArgs)" Importance="High"/>
-    <Exec Command="$(MSBuildProjectDirectory)\build.cmd $(BuildArgs)" />
+    <Exec Command="&quot;$(MSBuildProjectDirectory)\build.cmd&quot; $(BuildArgs)" />
 
   </Target>
 
diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
index e0583f58b7..adfa42e50d 100644
--- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
+++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -4,6 +4,7 @@
 
 using BenchmarkDotNet.Attributes;
 using BenchmarkDotNet.Running;
+using Microsoft.ML.Data;
 using Microsoft.ML.Models;
 using Microsoft.ML.Runtime.Api;
 using Microsoft.ML.Trainers;
@@ -50,7 +51,7 @@ public void Setup()
             s_trainedModel = TrainCore();
             IrisPrediction prediction = s_trainedModel.Predict(s_example);
 
-            var testData = new TextLoader<IrisData>(s_dataPath, useHeader: true, separator: "tab");
+            var testData = new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
             var evaluator = new ClassificationEvaluator();
             s_metrics = evaluator.Evaluate(s_trainedModel, testData);
 
@@ -70,7 +71,7 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
         {
             var pipeline = new LearningPipeline();
 
-            pipeline.Add(new TextLoader<IrisData>(s_dataPath, useHeader: true, separator: "tab"));
+            pipeline.Add(new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
             pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                 "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
 
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
index ee4f56c260..439dc069f4 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
@@ -36,7 +36,7 @@ public void TestSimpleExperiment()
             {
                 var experiment = env.CreateExperiment();
 
-                var importInput = new ML.Data.TextLoader();
+                var importInput = new ML.Data.TextLoader(dataPath);
                 var importOutput = experiment.Add(importInput);
 
                 var normalizeInput = new ML.Transforms.MinMaxNormalizer
@@ -67,7 +67,7 @@ public void TestSimpleTrainExperiment()
             {
                 var experiment = env.CreateExperiment();
 
-                var importInput = new ML.Data.TextLoader();
+                var importInput = new ML.Data.TextLoader(dataPath);
                 var importOutput = experiment.Add(importInput);
 
                 var catInput = new ML.Transforms.CategoricalOneHotVectorizer
@@ -165,7 +165,7 @@ public void TestTrainTestMacro()
 
                 var experiment = env.CreateExperiment();
 
-                var importInput = new ML.Data.TextLoader();
+                var importInput = new ML.Data.TextLoader(dataPath);
                 var importOutput = experiment.Add(importInput);
 
                 var trainTestInput = new ML.Models.TrainTestBinaryEvaluator
@@ -235,7 +235,7 @@ public void TestCrossValidationBinaryMacro()
 
                 var experiment = env.CreateExperiment();
 
-                var importInput = new ML.Data.TextLoader();
+                var importInput = new ML.Data.TextLoader(dataPath);
                 var importOutput = experiment.Add(importInput);
 
                 var crossValidateBinary = new ML.Models.BinaryCrossValidator
@@ -295,7 +295,7 @@ public void TestCrossValidationMacro()
                 var modelCombineOutput = subGraph.Add(modelCombine);
 
                 var experiment = env.CreateExperiment();
-                var importInput = new ML.Data.TextLoader();
+                var importInput = new ML.Data.TextLoader(dataPath);
                 var importOutput = experiment.Add(importInput);
 
                 var crossValidate = new ML.Models.CrossValidator
@@ -330,5 +330,73 @@ public void TestCrossValidationMacro()
                 }
             }
         }
+
+        [Fact]
+        public void TestCrossValidationMacroWithStratification()
+        {
+            var dataPath = GetDataPath(@"breast-cancer.txt");
+            using (var env = new TlcEnvironment())
+            {
+                var subGraph = env.CreateExperiment();
+
+                var nop = new ML.Transforms.NoOperation();
+                var nopOutput = subGraph.Add(nop);
+
+                var learnerInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier
+                {
+                    TrainingData = nopOutput.OutputData,
+                    NumThreads = 1
+                };
+                var learnerOutput = subGraph.Add(learnerInput);
+
+                var modelCombine = new ML.Transforms.ManyHeterogeneousModelCombiner
+                {
+                    TransformModels = new ArrayVar<ITransformModel>(nopOutput.Model),
+                    PredictorModel = learnerOutput.PredictorModel
+                };
+                var modelCombineOutput = subGraph.Add(modelCombine);
+
+                var experiment = env.CreateExperiment();
+                var importInput = new ML.Data.TextLoader(dataPath);
+                importInput.Arguments.Column = new ML.Data.TextLoaderColumn[]
+                {
+                    new ML.Data.TextLoaderColumn { Name = "Label", Source = new[] { new ML.Data.TextLoaderRange(0) } },
+                    new ML.Data.TextLoaderColumn { Name = "Strat", Source = new[] { new ML.Data.TextLoaderRange(1) } },
+                    new ML.Data.TextLoaderColumn { Name = "Features", Source = new[] { new ML.Data.TextLoaderRange(2, 9) } }
+                };
+                var importOutput = experiment.Add(importInput);
+
+                var crossValidate = new ML.Models.CrossValidator
+                {
+                    Data = importOutput.Data,
+                    Nodes = subGraph,
+                    TransformModel = null,
+                    StratificationColumn = "Strat"
+                };
+                crossValidate.Inputs.Data = nop.Data;
+                crossValidate.Outputs.Model = modelCombineOutput.PredictorModel;
+                var crossValidateOutput = experiment.Add(crossValidate);
+
+                experiment.Compile();
+                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
+                experiment.Run();
+                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]);
+
+                var schema = data.Schema;
+                var b = schema.TryGetColumnIndex("AUC", out int metricCol);
+                Assert.True(b);
+                using (var cursor = data.GetRowCursor(col => col == metricCol))
+                {
+                    var getter = cursor.GetGetter<double>(metricCol);
+                    b = cursor.MoveNext();
+                    Assert.True(b);
+                    double val = 0;
+                    getter(ref val);
+                    Assert.Equal(0.99, val, 2);
+                    b = cursor.MoveNext();
+                    Assert.False(b);
+                }
+            }
+        }
     }
 }
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index e8be6c0370..24e8374b4c 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -33,7 +33,35 @@ public void EntryPointTrainTestSplit()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
             var inputFile = new SimpleFileHandle(Env, dataPath, false, false);
-            var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=Features:TX:1-9" }).Data;
+            /*var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input
+            { InputFile = inputFile, CustomSchema = "col=Label:0 col=Features:TX:1-9" }).Data;*/
+
+            var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput()
+            {
+                Arguments =
+                {
+                    SeparatorChars = new []{',' },
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoader.Column()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} },
+                            Type = Runtime.Data.DataKind.Text
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "Features",
+                            Source = new [] { new TextLoader.Range() { Min = 1, Max = 9} },
+                            Type = Runtime.Data.DataKind.Text
+                        }
+                    }
+                },
+
+                InputFile = inputFile
+            }).Data;
 
             var splitOutput = TrainTestSplit.Split(Env, new TrainTestSplit.Input { Data = dataView, Fraction = 0.9f });
 
@@ -62,7 +90,44 @@ public void EntryPointFeatureCombiner()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
             var inputFile = new SimpleFileHandle(Env, dataPath, false, false);
-            var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data;
+            var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput()
+            {
+                Arguments =
+                {
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoader.Column()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} }
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F1",
+                            Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} },
+                            Type = Runtime.Data.DataKind.Text
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F2",
+                            Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} },
+                            Type = Runtime.Data.DataKind.I4
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "Rest",
+                            Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} }
+                        }
+                    }
+                },
+
+                InputFile = inputFile
+            }).Data;
+
             dataView = Env.CreateTransform("Term{col=F1}", dataView);
             var result = FeatureCombiner.PrepareFeatures(Env, new FeatureCombiner.FeatureCombinerInput() { Data = dataView, Features = new[] { "F1", "F2", "Rest" } }).OutputData;
             var expected = Env.CreateTransform("Convert{col=F2 type=R4}", dataView);
@@ -82,7 +147,44 @@ public void EntryPointScoring()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
             var inputFile = new SimpleFileHandle(Env, dataPath, false, false);
-            var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data;
+            var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput()
+            {
+                Arguments =
+                {
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoader.Column()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} }
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F1",
+                            Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} },
+                            Type = Runtime.Data.DataKind.Text
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F2",
+                            Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} },
+                            Type = Runtime.Data.DataKind.I4
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "Rest",
+                            Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} }
+                        }
+                    }
+                },
+
+                InputFile = inputFile
+            }).Data;
+
             dataView = Env.CreateTransform("Term{col=F1}", dataView);
 
             var trainData = FeatureCombiner.PrepareFeatures(Env, new FeatureCombiner.FeatureCombinerInput() { Data = dataView, Features = new[] { "F1", "F2", "Rest" } });
@@ -105,7 +207,44 @@ public void EntryPointApplyModel()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
             var inputFile = new SimpleFileHandle(Env, dataPath, false, false);
-            var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data;
+            var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput()
+            {
+                Arguments =
+                {
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoader.Column()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} },
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F1",
+                            Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} },
+                            Type = Runtime.Data.DataKind.Text
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F2",
+                            Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} },
+                            Type = Runtime.Data.DataKind.I4
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "Rest",
+                            Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} }
+                        }
+                    }
+                },
+
+                InputFile = inputFile
+            }).Data;
+
             dataView = Env.CreateTransform("Term{col=F1}", dataView);
 
             var data1 = FeatureCombiner.PrepareFeatures(Env, new FeatureCombiner.FeatureCombinerInput() { Data = dataView, Features = new[] { "F1", "F2", "Rest" } });
@@ -120,7 +259,49 @@ public void EntryPointCaching()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
             var inputFile = new SimpleFileHandle(Env, dataPath, false, false);
-            var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data;
+            /*var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile,
+                CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data;
+                */
+
+            var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput()
+            {
+                Arguments =
+                {
+                    SeparatorChars = new []{',' },
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoader.Column()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} }
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F1",
+                            Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} },
+                            Type = Runtime.Data.DataKind.Text
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "F2",
+                            Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} },
+                            Type = Runtime.Data.DataKind.I4
+                        },
+
+                        new TextLoader.Column()
+                        {
+                            Name = "Rest",
+                            Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} }
+                        }
+                    }
+                },
+
+                InputFile = inputFile
+            }).Data;
+
             dataView = Env.CreateTransform("Term{col=F1}", dataView);
 
             var cached1 = Cache.CacheData(Env, new Cache.CacheInput() { Data = dataView, Caching = Cache.CachingType.Memory });
@@ -305,7 +486,7 @@ public void EntryPointOptionalParams()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file1'
                       },
@@ -355,7 +536,7 @@ public void EntryPointExecGraphCommand()
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1'
                       }},
@@ -512,7 +693,7 @@ public void EntryPointParseColumns()
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1'
                       }},
@@ -562,7 +743,7 @@ public void EntryPointCountFeatures()
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1'
                       }},
@@ -607,7 +788,7 @@ public void EntryPointMutualSelectFeatures()
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1'
                       }},
@@ -653,7 +834,7 @@ public void EntryPointTextToKeyToText()
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1',
                         'CustomSchema': 'sep=comma col=Cat:TX:4'
@@ -735,7 +916,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file'
                       }},
@@ -1214,7 +1395,7 @@ internal void TestEntryPointPipelineRoutine(string dataFile, string schema, stri
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1',
                         'CustomSchema': '{schema}'
@@ -1287,7 +1468,7 @@ internal void TestEntryPointRoutine(string dataFile, string trainerName, string
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1'
                         {3}
@@ -1459,7 +1640,7 @@ public void EntryPointNormalizeIfNeeded()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -1522,7 +1703,7 @@ public void EntryPointTrainTestBinaryMacro()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -1630,7 +1811,7 @@ public void EntryPointTrainTestMacroNoTransformInput()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -1744,7 +1925,7 @@ public void EntryPointTrainTestMacro()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -1843,7 +2024,7 @@ public void EntryPointChainedTrainTestMacros()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -2019,7 +2200,7 @@ public void EntryPointChainedCrossValMacros()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -2214,7 +2395,7 @@ public void EntryPointMacroEarlyExpansion()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -2302,7 +2483,7 @@ public void EntryPointSerialization()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {
                         'InputFile': '$file'
                       },
@@ -2368,7 +2549,7 @@ public void EntryPointNodeSchedulingFields()
                 {
                   'Nodes': [
                     {
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'StageId': '5063dee8f19c4dd89a1fc3a9da5351a7',
                       'Inputs': {
                         'InputFile': '$file'
@@ -2437,7 +2618,7 @@ public void EntryPointPrepareLabelConvertPredictedLabel()
                 {{
                   'Nodes': [
                     {{
-                      'Name': 'Data.TextLoader',
+                      'Name': 'Data.CustomTextLoader',
                       'Inputs': {{
                         'InputFile': '$file1',
                         'CustomSchema': 'sep=comma col=Label:TX:4 col=Features:Num:0-3'
@@ -2527,7 +2708,9 @@ public void EntryPointTreeLeafFeaturizer()
         {
             var dataPath = GetDataPath(@"adult.tiny.with-schema.txt");
             var inputFile = new SimpleFileHandle(Env, dataPath, false, false);
+#pragma warning disable 0618
             var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile }).Data;
+#pragma warning restore 0618
             var cat = Categorical.CatTransformDict(Env, new CategoricalTransform.Arguments()
             {
                 Data = dataView,
diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs
index 5166540ccd..3620a3580a 100644
--- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs
+++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs
@@ -20,14 +20,14 @@ public TestAutoInference(ITestOutputHelper helper)
         {
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
+        [Fact]
         [TestCategory("EntryPoints")]
         public void TestLearn()
         {
             using (var env = new TlcEnvironment())
             {
-                string pathData = GetDataPath(@"../UCI/adult.train");
-                string pathDataTest = GetDataPath(@"../UCI/adult.test");
+                string pathData = GetDataPath(@"../../Samples/UCI/adult.train");
+                string pathDataTest = GetDataPath(@"../../Samples/UCI/adult.test");
                 int numOfSampleRows = 1000;
                 int batchSize = 5;
                 int numIterations = 10;
@@ -49,46 +49,107 @@ public void TestLearn()
 
                 // Use best pipeline for another task
                 var inputFileTrain = new SimpleFileHandle(env, pathData, false, false);
+#pragma warning disable 0618
                 var datasetTrain = ImportTextData.ImportText(env,
                     new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data;
                 var inputFileTest = new SimpleFileHandle(env, pathDataTest, false, false);
                 var datasetTest = ImportTextData.ImportText(env,
                     new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data;
+#pragma warning restore 0618
 
                 // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and 
                 // we get unlucky and only select it every time, such that this test fails. Not
                 // likely at all, but a non-zero probability. Should be ok, since all current learners are returning d > .80.
-                double d = bestPipeline.RunTrainTestExperiment(datasetTrain, datasetTest, metric, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);
-                env.Check(d > 0.2);
+                bestPipeline.RunTrainTestExperiment(datasetTrain, datasetTest, metric, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer,
+                    out var testMetricValue, out var trainMtericValue);
+                env.Check(testMetricValue > 0.2);
             }
             Done();
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
+        [Fact]
+        [TestCategory("EntryPoints")]
+        public void TestPipelineSweeperMacroNoTransforms()
+        {
+            // Set up inputs for experiment
+            string pathData = GetDataPath(@"../../Samples/UCI/adult.train");
+            string pathDataTest = GetDataPath(@"../../Samples/UCI/adult.test");
+            const int numOfSampleRows = 1000;
+            const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=Label:R4:14 header=+";
+
+            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);
+#pragma warning disable 0618
+            var datasetTrain = ImportTextData.ImportText(Env,
+                new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows);
+            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
+            var datasetTest = ImportTextData.ImportText(Env,
+                new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows);
+#pragma warning restore 0618
+            const int batchSize = 5;
+            const int numIterations = 20;
+            const int numTransformLevels = 2;
+            AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc;
+
+            // Using the simple, uniform random sampling (with replacement) engine
+            PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(Env);
+
+            // Create search object
+            var amls = new AutoInference.AutoMlMlState(Env, metric, autoMlEngine, new IterationTerminator(numIterations),
+                MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, datasetTrain, datasetTest);
+
+            // Infer search space
+            amls.InferSearchSpace(numTransformLevels);
+
+            // Create macro object
+            var pipelineSweepInput = new Microsoft.ML.Models.PipelineSweeper()
+            {
+                BatchSize = batchSize,
+            };
+
+            var exp = new Experiment(Env);
+            var output = exp.Add(pipelineSweepInput);
+            exp.Compile();
+            exp.SetInput(pipelineSweepInput.TrainingData, datasetTrain);
+            exp.SetInput(pipelineSweepInput.TestingData, datasetTest);
+            exp.SetInput(pipelineSweepInput.State, amls);
+            exp.SetInput(pipelineSweepInput.CandidateOutputs, new IDataView[0]);
+            exp.Run();
+
+            // Make sure you get back an AutoMlState, and that it ran for correct number of iterations
+            // with at least minimal performance values (i.e., best should have AUC better than 0.1 on this dataset).
+            AutoInference.AutoMlMlState amlsOut = (AutoInference.AutoMlMlState)exp.GetOutput(output.State);
+            Assert.NotNull(amlsOut);
+            Assert.Equal(amlsOut.GetAllEvaluatedPipelines().Length, numIterations);
+            Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.1);
+        }
+
+        [Fact]
         [TestCategory("EntryPoints")]
         public void EntryPointPipelineSweepSerialization()
         {
             // Get datasets
-            var pathData = GetDataPath(@"../UCI/adult.train");
-            var pathDataTest = GetDataPath(@"../UCI/adult.test");
+            var pathData = GetDataPath(@"../../Samples/UCI/adult.train");
+            var pathDataTest = GetDataPath(@"../../Samples/UCI/adult.test");
             const int numOfSampleRows = 1000;
             int numIterations = 10;
             const string schema =
                 "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
                 "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
             var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);
+#pragma warning disable 0618
             var datasetTrain = ImportTextData.ImportText(Env,
                 new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows);
             var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
             var datasetTest = ImportTextData.ImportText(Env,
                 new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows);
+#pragma warning restore 0618
 
             // Define entrypoint graph
             string inputGraph = @"
                 {
                   'Nodes': [
                     {
-                      'Name': 'Commands.PipelineSweep',
+                      'Name': 'Models.PipelineSweeper',
                       'Inputs': {
                         'TrainingData': '$TrainingData',
                         'TestingData': '$TestingData',
@@ -130,7 +191,8 @@ public void EntryPointPipelineSweepSerialization()
 
             var results = runner.GetOutput<IDataView>("ResultsOut");
             Assert.NotNull(results);
-            var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId");
+            var rows = PipelinePattern.ExtractResults(Env, results,
+                "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
             Assert.True(rows.Length == numIterations);
         }
 
@@ -143,12 +205,13 @@ public void EntryPointPipelineSweep()
             const int numOfSampleRows = 1000;
             int numIterations = 4;
             var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);
+#pragma warning disable 0618
             var datasetTrain = ImportTextData.ImportText(Env,
                 new ImportTextData.Input { InputFile = inputFileTrain }).Data.Take(numOfSampleRows);
             var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
             var datasetTest = ImportTextData.ImportText(Env,
                 new ImportTextData.Input { InputFile = inputFileTest }).Data.Take(numOfSampleRows);
-
+#pragma warning restore 0618
             // Define entrypoint graph
             string inputGraph = @"
                 {
@@ -201,355 +264,344 @@ public void EntryPointPipelineSweep()
 
             var results = runner.GetOutput<IDataView>("ResultsOut");
             Assert.NotNull(results);
-            var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId");
+            var rows = PipelinePattern.ExtractResults(Env, results,
+                "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
             Assert.True(rows.Length == numIterations);
+            Assert.True(rows.All(r => r.TrainingMetricValue > 0.1));
         }
 
-        [Fact(Skip = "Datasets Not Present")]
+        [Fact]
         public void TestRocketPipelineEngine()
         {
-            //// Get datasets
-            //var pathData = GetDataPath(@"../UCI", "adult.train");
-            //var pathDataTest = GetDataPath(@"../UCI", "adult.test");
-            //const int numOfSampleRows = 1000;
-            //int numIterations = 35;
-            //const string schema =
-                //"sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
-                //"col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
-            //var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);
-            //var datasetTrain = ImportTextData.ImportText(Env,
-                //new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows);
-            //var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
-            //var datasetTest = ImportTextData.ImportText(Env,
-                //new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows);
-
-            //// Define entrypoint graph
-            //string inputGraph = @"
-                //{
-                  //'Nodes': [                                
-                    //{
-                      //'Name': 'Commands.PipelineSweep',
-                      //'Inputs': {
-                        //'TrainingData': '$TrainingData',
-                        //'TestingData': '$TestingData',
-                        //'StateArguments': {
-                            //'Name': 'AutoMlState',
-                            //'Settings': {
-                                //'Metric': 'Auc',
-                                //'Engine': {
-                                    //'Name': 'Rocket',
-                                    //'Settings' : {
-                                        //'TopKLearners' : 2,
-                                        //'SecondRoundTrialsPerLearner' : 5
-                                    //},
-                                //},
-                                //'TerminatorArgs': {
-                                    //'Name': 'IterationLimited',
-                                    //'Settings': {
-                                        //'FinalHistoryLength': 35
-                                    //}
-                                //},
-                                //'TrainerKind': 'SignatureBinaryClassifierTrainer'
-                            //}
-                        //},
-                        //'BatchSize': 5
-                      //},
-                      //'Outputs': {
-                        //'State': '$StateOut',
-                        //'Results': '$ResultsOut'
-                      //}
-                    //},
-                  //]
-                //}";
-
-            //JObject graph = JObject.Parse(inputGraph);
-            //var catalog = ModuleCatalog.CreateInstance(Env);
-
-            //var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray);
-            //runner.SetInput("TrainingData", datasetTrain);
-            //runner.SetInput("TestingData", datasetTest);
-            //runner.RunAll();
-
-            //var autoMlState = runner.GetOutput<AutoInference.AutoMlMlState>("StateOut");
-            //Assert.IsNotNull(autoMlState);
-            //var allPipelines = autoMlState.GetAllEvaluatedPipelines();
-            //var bestPipeline = autoMlState.GetBestPipeline();
-            //Assert.AreEqual(allPipelines.Length, numIterations);
-            //Assert.IsTrue(bestPipeline.PerformanceSummary.MetricValue > 0.1);
-
-            //var results = runner.GetOutput<IDataView>("ResultsOut");
-            //Assert.IsNotNull(results);
-            //var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId");
-            //Assert.IsTrue(rows.Length == numIterations);
+            // Get datasets
+            var pathData = GetDataPath(@"../../Samples/UCI", "adult.train");
+            var pathDataTest = GetDataPath(@"../../Samples/UCI", "adult.test");
+            const int numOfSampleRows = 1000;
+            int numIterations = 35;
+            const string schema =
+                "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
+                "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
+            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);
+#pragma warning disable 0618
+            var datasetTrain = ImportTextData.ImportText(Env,
+                new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows);
+            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
+            var datasetTest = ImportTextData.ImportText(Env,
+                new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows);
+#pragma warning restore 0618
+            // Define entrypoint graph
+            string inputGraph = @"
+                {
+                  'Nodes': [                                
+                    {
+                      'Name': 'Models.PipelineSweeper',
+                      'Inputs': {
+                        'TrainingData': '$TrainingData',
+                        'TestingData': '$TestingData',
+                        'StateArguments': {
+                            'Name': 'AutoMlState',
+                            'Settings': {
+                                'Metric': 'Auc',
+                                'Engine': {
+                                    'Name': 'Rocket',
+                                    'Settings' : {
+                                        'TopKLearners' : 2,
+                                        'SecondRoundTrialsPerLearner' : 5
+                                    },
+                                },
+                                'TerminatorArgs': {
+                                    'Name': 'IterationLimited',
+                                    'Settings': {
+                                        'FinalHistoryLength': 35
+                                    }
+                                },
+                                'TrainerKind': 'SignatureBinaryClassifierTrainer'
+                            }
+                        },
+                        'BatchSize': 5
+                      },
+                      'Outputs': {
+                        'State': '$StateOut',
+                        'Results': '$ResultsOut'
+                      }
+                    },
+                  ]
+                }";
+
+            JObject graph = JObject.Parse(inputGraph);
+            var catalog = ModuleCatalog.CreateInstance(Env);
+
+            var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray);
+            runner.SetInput("TrainingData", datasetTrain);
+            runner.SetInput("TestingData", datasetTest);
+            runner.RunAll();
+
+            var autoMlState = runner.GetOutput<AutoInference.AutoMlMlState>("StateOut");
+            Assert.NotNull(autoMlState);
+            var allPipelines = autoMlState.GetAllEvaluatedPipelines();
+            var bestPipeline = autoMlState.GetBestPipeline();
+            Assert.Equal(allPipelines.Length, numIterations);
+            Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1);
+
+            var results = runner.GetOutput<IDataView>("ResultsOut");
+            Assert.NotNull(results);
+            var rows = PipelinePattern.ExtractResults(Env, results,
+                "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
+            Assert.True(rows.Length == numIterations);
         }
 
         [Fact(Skip = "Need CoreTLC specific baseline update")]
         public void TestTextDatasetLearn()
         {
-            //using (var env = new TlcEnvironment())
-            //{
-                //string pathData = GetDataPath(@"../UnitTest/tweets_labeled_10k_test_validation.tsv");
-                //int batchSize = 5;
-                //int numIterations = 35;
-                //int numTransformLevels = 1;
-                //int numSampleRows = 100;
-                //AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.AccuracyMicro;
-
-                //// Using the simple, uniform random sampling (with replacement) engine
-                //PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);
-
-                //// Test initial learning
-                //var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var _, numTransformLevels, batchSize,
-                    //metric, out var _, numSampleRows, new IterationTerminator(numIterations),
-                    //MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer);
-                //env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);
-            //}
-            //Done();
-        }
+            using (var env = new TlcEnvironment())
+            {
+                string pathData = GetDataPath(@"../UnitTest/tweets_labeled_10k_test_validation.tsv");
+                int batchSize = 5;
+                int numIterations = 35;
+                int numTransformLevels = 1;
+                int numSampleRows = 100;
+                AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.AccuracyMicro;
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
-        public void TestPipelineNodeCloning()
-        {
-            //using (var env = new TlcEnvironment())
-            //{
-                //var lr1 = RecipeInference
-                    //.AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
-                    //.First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("LogisticRegression"));
-
-                //var sdca1 = RecipeInference
-                    //.AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
-                    //.First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("Sdca"));
-
-                //// Clone and change hyperparam values
-                //var lr2 = lr1.Clone();
-                //lr1.PipelineNode.SweepParams[0].RawValue = 1.2f;
-                //lr2.PipelineNode.SweepParams[0].RawValue = 3.5f;
-                //var sdca2 = sdca1.Clone();
-                //sdca1.PipelineNode.SweepParams[0].RawValue = 3;
-                //sdca2.PipelineNode.SweepParams[0].RawValue = 0;
-
-                //// Make sure the changes are propagated to entry point objects
-                //env.Check(lr1.PipelineNode.UpdateProperties());
-                //env.Check(lr2.PipelineNode.UpdateProperties());
-                //env.Check(sdca1.PipelineNode.UpdateProperties());
-                //env.Check(sdca2.PipelineNode.UpdateProperties());
-                //env.Check(lr1.PipelineNode.CheckEntryPointStateMatchesParamValues());
-                //env.Check(lr2.PipelineNode.CheckEntryPointStateMatchesParamValues());
-                //env.Check(sdca1.PipelineNode.CheckEntryPointStateMatchesParamValues());
-                //env.Check(sdca2.PipelineNode.CheckEntryPointStateMatchesParamValues());
-
-                //// Make sure second object's set of changes didn't overwrite first object's
-                //env.Check(!lr1.PipelineNode.SweepParams[0].RawValue.Equals(lr2.PipelineNode.SweepParams[0].RawValue));
-                //env.Check(!sdca2.PipelineNode.SweepParams[0].RawValue.Equals(sdca1.PipelineNode.SweepParams[0].RawValue));
-            //}
+                // Using the simple, uniform random sampling (with replacement) engine
+                PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);
+
+                // Test initial learning
+                var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var _, numTransformLevels, batchSize,
+                metric, out var _, numSampleRows, new IterationTerminator(numIterations),
+                MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer);
+                env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);
+            }
+            Done();
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
-        public void TestSupportedMetricsByName()
+        [Fact]
+        public void TestPipelineNodeCloning()
         {
-            //var fields =
-                    //typeof(AutoInference.SupportedMetric).GetMembers(BindingFlags.Static | BindingFlags.Public)
-                    //.Where(s => s.MemberType == MemberTypes.Field);
-            //foreach (var field in fields)
-            //{
-                //var metric = AutoInference.SupportedMetric.ByName(field.Name);
-                //Assert.IsTrue(metric?.Name == field.Name);
-            //}
-
+            using (var env = new TlcEnvironment())
+            {
+                var lr1 = RecipeInference
+                    .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
+                    .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("LogisticRegression"));
+
+                var sdca1 = RecipeInference
+                    .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
+                    .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("StochasticDualCoordinateAscent"));
+
+                // Clone and change hyperparam values
+                var lr2 = lr1.Clone();
+                lr1.PipelineNode.SweepParams[0].RawValue = 1.2f;
+                lr2.PipelineNode.SweepParams[0].RawValue = 3.5f;
+                var sdca2 = sdca1.Clone();
+                sdca1.PipelineNode.SweepParams[0].RawValue = 3;
+                sdca2.PipelineNode.SweepParams[0].RawValue = 0;
+
+                // Make sure the changes are propagated to entry point objects
+                env.Check(lr1.PipelineNode.UpdateProperties());
+                env.Check(lr2.PipelineNode.UpdateProperties());
+                env.Check(sdca1.PipelineNode.UpdateProperties());
+                env.Check(sdca2.PipelineNode.UpdateProperties());
+                env.Check(lr1.PipelineNode.CheckEntryPointStateMatchesParamValues());
+                env.Check(lr2.PipelineNode.CheckEntryPointStateMatchesParamValues());
+                env.Check(sdca1.PipelineNode.CheckEntryPointStateMatchesParamValues());
+                env.Check(sdca2.PipelineNode.CheckEntryPointStateMatchesParamValues());
+
+                // Make sure second object's set of changes didn't overwrite first object's
+                env.Check(!lr1.PipelineNode.SweepParams[0].RawValue.Equals(lr2.PipelineNode.SweepParams[0].RawValue));
+                env.Check(!sdca2.PipelineNode.SweepParams[0].RawValue.Equals(sdca1.PipelineNode.SweepParams[0].RawValue));
+            }
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
+        [Fact]
         public void TestHyperparameterFreezing()
         {
-            //string pathData = GetDataPath(@"../UCI", "adult.train");
-            //int numOfSampleRows = 1000;
-            //int batchSize = 1;
-            //int numIterations = 10;
-            //int numTransformLevels = 3;
-            //AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc;
-
-            //// Using the simple, uniform random sampling (with replacement) brain
-            //PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);
-
-            //// Run initial experiments
-            //var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize,
-                //metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations),
-                //MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);
-
-            //// Clear results
-            //amls.ClearEvaluatedPipelines();
-
-            //// Get space, remove transforms and all but one learner, freeze hyperparameters on learner.
-            //var space = amls.GetSearchSpace();
-            //var transforms = space.Item1.Where(t =>
-                //t.ExpertType != typeof(TransformInference.Experts.Categorical)).ToArray();
-            //var learners = new[] { space.Item2.First() };
-            //var hyperParam = learners[0].PipelineNode.SweepParams.First();
-            //var frozenParamValue = hyperParam.RawValue;
-            //hyperParam.Frozen = true;
-            //amls.UpdateSearchSpace(learners, transforms);
-
-            //// Allow for one more iteration
-            //amls.UpdateTerminator(new IterationTerminator(numIterations + 1));
-
-            //// Do learning. Only retained learner should be left in all pipelines.
-            //bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);
-
-            //// Make sure all pipelines have retained learner
-            //Assert.IsTrue(amls.GetAllEvaluatedPipelines().All(p => p.Learner.LearnerName == learners[0].LearnerName));
-
-            //// Make sure hyperparameter value did not change
-            //Assert.IsNotNull(bestPipeline);
-            //Assert.AreEqual(bestPipeline.Learner.PipelineNode.SweepParams.First().RawValue, frozenParamValue);
+            string pathData = GetDataPath(@"../../Samples/UCI", "adult.train");
+            int numOfSampleRows = 1000;
+            int batchSize = 1;
+            int numIterations = 10;
+            int numTransformLevels = 3;
+            AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc;
+
+            // Using the simple, uniform random sampling (with replacement) brain
+            PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);
+
+            // Run initial experiments
+            var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize,
+                metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations),
+                MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);
+
+            // Clear results
+            amls.ClearEvaluatedPipelines();
+
+            // Get space, remove transforms and all but one learner, freeze hyperparameters on learner.
+            var space = amls.GetSearchSpace();
+            var transforms = space.Item1.Where(t =>
+                t.ExpertType != typeof(TransformInference.Experts.Categorical)).ToArray();
+            var learners = new[] { space.Item2.First() };
+            var hyperParam = learners[0].PipelineNode.SweepParams.First();
+            var frozenParamValue = hyperParam.RawValue;
+            hyperParam.Frozen = true;
+            amls.UpdateSearchSpace(learners, transforms);
+
+            // Allow for one more iteration
+            amls.UpdateTerminator(new IterationTerminator(numIterations + 1));
+
+            // Do learning. Only retained learner should be left in all pipelines.
+            bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);
+
+            // Make sure all pipelines have retained learner
+            Assert.True(amls.GetAllEvaluatedPipelines().All(p => p.Learner.LearnerName == learners[0].LearnerName));
+
+            // Make sure hyperparameter value did not change
+            Assert.NotNull(bestPipeline);
+            Assert.Equal(bestPipeline.Learner.PipelineNode.SweepParams.First().RawValue, frozenParamValue);
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
+        [Fact(Skip = "Dataset not available.")]
         public void TestRegressionPipelineWithMinimizingMetric()
         {
-            //string pathData = GetDataPath("../Housing (regression)/housing.txt");
-            //int numOfSampleRows = 100;
-            //int batchSize = 5;
-            //int numIterations = 10;
-            //int numTransformLevels = 1;
-            //AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.L1;
-
-            //// Using the simple, uniform random sampling (with replacement) brain
-            //PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);
-
-            //// Run initial experiments
-            //var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize,
-                //metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations),
-                //MacroUtils.TrainerKinds.SignatureRegressorTrainer);
-
-            //// Allow for one more iteration
-            //amls.UpdateTerminator(new IterationTerminator(numIterations + 1));
-
-            //// Do learning. Only retained learner should be left in all pipelines.
-            //bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);
-
-            //// Make sure hyperparameter value did not change
-            //Assert.IsNotNull(bestPipeline);
-            //Assert.IsTrue(amls.GetAllEvaluatedPipelines().All(
-                //p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue));
+            string pathData = GetDataPath("../Housing (regression)/housing.txt");
+            int numOfSampleRows = 100;
+            int batchSize = 5;
+            int numIterations = 10;
+            int numTransformLevels = 1;
+            AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.L1;
+
+            // Using the simple, uniform random sampling (with replacement) brain
+            PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);
+
+            // Run initial experiments
+            var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize,
+            metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations),
+            MacroUtils.TrainerKinds.SignatureRegressorTrainer);
+
+            // Allow for one more iteration
+            amls.UpdateTerminator(new IterationTerminator(numIterations + 1));
+
+            // Do learning. Only retained learner should be left in all pipelines.
+            bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);
+
+            // Make sure hyperparameter value did not change
+            Assert.NotNull(bestPipeline);
+            Assert.True(amls.GetAllEvaluatedPipelines().All(
+            p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue));
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
+        [Fact]
         public void TestLearnerConstrainingByName()
         {
-            //string pathData = GetDataPath(@"../UCI", "adult.train");
-            //int numOfSampleRows = 1000;
-            //int batchSize = 1;
-            //int numIterations = 1;
-            //int numTransformLevels = 2;
-            //var prefix = "Microsoft.ML.Api.Experiment";
-            //var retainedLearnerNames = new[] { $"{prefix}.LogisticRegression", $"{prefix}.FastTree" };
-            //AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc;
-
-            //// Using the simple, uniform random sampling (with replacement) brain.
-            //PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);
-
-            //// Run initial experiment.
-            //var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _,
-                //numTransformLevels, batchSize, metric, out var _, numOfSampleRows,
-                //new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);
-
-            //// Keep only logistic regression and FastTree.
-            //amls.KeepSelectedLearners(retainedLearnerNames);
-            //var space = amls.GetSearchSpace();
-
-            //// Make sure only learners left are those retained.
-            //Assert.AreEqual(retainedLearnerNames.Length, space.Item2.Length);
-            //Assert.IsTrue(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName)));
+            string pathData = GetDataPath(@"../../Samples/UCI", "adult.train");
+            int numOfSampleRows = 1000;
+            int batchSize = 1;
+            int numIterations = 1;
+            int numTransformLevels = 2;
+            var retainedLearnerNames = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" };
+            AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc;
+
+            // Using the simple, uniform random sampling (with replacement) brain.
+            PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env);
+
+            // Run initial experiment.
+            var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _,
+            numTransformLevels, batchSize, metric, out var _, numOfSampleRows,
+            new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);
+
+            // Keep only logistic regression and FastTree.
+            amls.KeepSelectedLearners(retainedLearnerNames);
+            var space = amls.GetSearchSpace();
+
+            // Make sure only learners left are those retained.
+            Assert.Equal(retainedLearnerNames.Length, space.Item2.Length);
+            Assert.True(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName)));
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
+        [Fact]
         public void TestRequestedLearners()
         {
-            //// Get datasets
-            //var pathData = GetDataPath(@"../UCI", "adult.train");
-            //var pathDataTest = GetDataPath(@"../UCI", "adult.test");
-            //const int numOfSampleRows = 100;
-            //const string schema =
-                //"sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
-                //"col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
-            //var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);
-            //var datasetTrain = ImportTextData.ImportText(Env,
-                //new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows);
-            //var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
-            //var datasetTest = ImportTextData.ImportText(Env,
-                //new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows);
-            //var prefix = "Microsoft.ML.Api.Experiment";
-            //var requestedLearners = new[] { $"{prefix}.LogisticRegression", $"{prefix}.FastTree" };
-
-            //// Define entrypoint graph
-            //string inputGraph = @"
-                //{
-                  //'Nodes': [                                
-                    //{
-                      //'Name': 'Commands.PipelineSweep',
-                      //'Inputs': {
-                        //'TrainingData': '$TrainingData',
-                        //'TestingData': '$TestingData',
-                        //'StateArguments': {
-                            //'Name': 'AutoMlState',
-                            //'Settings': {
-                                //'Metric': 'Auc',
-                                //'Engine': {
-                                    //'Name': 'Rocket',
-                                    //'Settings' : {
-                                        //'TopKLearners' : 2,
-                                        //'SecondRoundTrialsPerLearner' : 0
-                                    //},
-                                //},
-                                //'TerminatorArgs': {
-                                    //'Name': 'IterationLimited',
-                                    //'Settings': {
-                                        //'FinalHistoryLength': 35
-                                    //}
-                                //},
-                                //'TrainerKind': 'SignatureBinaryClassifierTrainer',
-                                //'RequestedLearners' : [
-                                    //'Microsoft.ML.Api.Experiment.LogisticRegression',
-                                    //'Microsoft.ML.Api.Experiment.FastTree'
-                                //]
-                            //}
-                        //},
-                        //'BatchSize': 5
-                      //},
-                      //'Outputs': {
-                        //'State': '$StateOut',
-                        //'Results': '$ResultsOut'
-                      //}
-                    //},
-                  //]
-                //}";
-
-            //JObject graph = JObject.Parse(inputGraph);
-            //var catalog = ModuleCatalog.CreateInstance(Env);
-
-            //var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray);
-            //runner.SetInput("TrainingData", datasetTrain);
-            //runner.SetInput("TestingData", datasetTest);
-            //runner.RunAll();
-
-            //var autoMlState = runner.GetOutput<AutoInference.AutoMlMlState>("StateOut");
-            //Assert.IsNotNull(autoMlState);
-            //var space = autoMlState.GetSearchSpace();
-
-            //// Make sure only learners left are those retained.
-            //Assert.AreEqual(requestedLearners.Length, space.Item2.Length);
-            //Assert.IsTrue(space.Item2.All(l => requestedLearners.Any(r => r == l.LearnerName)));
+            // Get datasets
+            var pathData = GetDataPath(@"../../Samples/UCI", "adult.train");
+            var pathDataTest = GetDataPath(@"../../Samples/UCI", "adult.test");
+            const int numOfSampleRows = 100;
+            const string schema =
+                "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
+                "col=relationship:TX:7 col=race:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
+            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);
+#pragma warning disable 0618
+            var datasetTrain = ImportTextData.ImportText(Env,
+                new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows);
+            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
+            var datasetTest = ImportTextData.ImportText(Env,
+                new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows);
+            var requestedLearners = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" };
+#pragma warning restore 0618
+            // Define entrypoint graph
+            string inputGraph = @"
+                {
+                  'Nodes': [                                
+                    {
+                      'Name': 'Models.PipelineSweeper',
+                      'Inputs': {
+                        'TrainingData': '$TrainingData',
+                        'TestingData': '$TestingData',
+                        'StateArguments': {
+                            'Name': 'AutoMlState',
+                            'Settings': {
+                                'Metric': 'Auc',
+                                'Engine': {
+                                    'Name': 'Rocket',
+                                    'Settings' : {
+                                        'TopKLearners' : 2,
+                                        'SecondRoundTrialsPerLearner' : 0
+                                    },
+                                },
+                                'TerminatorArgs': {
+                                    'Name': 'IterationLimited',
+                                    'Settings': {
+                                        'FinalHistoryLength': 35
+                                    }
+                                },
+                                'TrainerKind': 'SignatureBinaryClassifierTrainer',
+                                'RequestedLearners' : [
+                                    'LogisticRegressionBinaryClassifier',
+                                    'FastTreeBinaryClassifier'
+                                ]
+                            }
+                        },
+                        'BatchSize': 5
+                      },
+                      'Outputs': {
+                        'State': '$StateOut',
+                        'Results': '$ResultsOut'
+                      }
+                    },
+                  ]
+                }";
+
+            JObject graph = JObject.Parse(inputGraph);
+            var catalog = ModuleCatalog.CreateInstance(Env);
+
+            var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray);
+            runner.SetInput("TrainingData", datasetTrain);
+            runner.SetInput("TestingData", datasetTest);
+            runner.RunAll();
+
+            var autoMlState = runner.GetOutput<AutoInference.AutoMlMlState>("StateOut");
+            Assert.NotNull(autoMlState);
+            var space = autoMlState.GetSearchSpace();
+
+            // Make sure only learners left are those retained.
+            Assert.Equal(requestedLearners.Length, space.Item2.Length);
+            Assert.True(space.Item2.All(l => requestedLearners.Any(r => r == l.LearnerName)));
         }
 
-        [Fact(Skip = "Need CoreTLC specific baseline update")]
+        [Fact]
         public void TestMinimizingMetricTransformations()
         {
-            //var values = new[] { 100d, 10d, -2d, -1d, 5.8d, -3.1d };
-            //var maxWeight = values.Max();
-            //var processed = values.Select(v => AutoMlUtils.ProcessWeight(v, maxWeight, false));
-            //var expectedResult = new[] { 0d, 90d, 102d, 101d, 94.2d, 103.1d };
+            var values = new[] { 100d, 10d, -2d, -1d, 5.8d, -3.1d };
+            var maxWeight = values.Max();
+            var processed = values.Select(v => AutoMlUtils.ProcessWeight(v, maxWeight, false));
+            var expectedResult = new[] { 0d, 90d, 102d, 101d, 94.2d, 103.1d };
 
-            //Assert.IsTrue(processed.Select((x, idx) => Math.Abs(x - expectedResult[idx]) < 0.001).All(r => r));
+            Assert.True(processed.Select((x, idx) => System.Math.Abs(x - expectedResult[idx]) < 0.001).All(r => r));
         }
     }
 }
diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs
index dca360c4e3..1b0ab4eb8e 100644
--- a/test/Microsoft.ML.TestFramework/ModelHelper.cs
+++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.CommandLine;
 using Microsoft.ML.Runtime.Data;
@@ -40,24 +41,187 @@ public static void WriteKcHousePriceModel(string dataPath, Stream stream)
 
         public static IDataView GetKcHouseDataView(string dataPath)
         {
-            var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,";
-            var txtArgs = new TextLoader.Arguments();
+            var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 " +
+                "col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 " +
+                "col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 " +
+                "col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 " +
+                "col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 " +
+                "col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,";
+
+            var txtArgs = new Runtime.Data.TextLoader.Arguments();
             bool parsed = CmdParser.ParseArguments(s_environment, dataSchema, txtArgs);
             s_environment.Assert(parsed);
-            var txtLoader = new TextLoader(s_environment, txtArgs, new MultiFileSource(dataPath));
+            var txtLoader = new Runtime.Data.TextLoader(s_environment, txtArgs, new MultiFileSource(dataPath));
             return txtLoader;
         }
 
         private static ITransformModel CreateKcHousePricePredictorModel(string dataPath)
         {
-            var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,";
-
             Experiment experiment = s_environment.CreateExperiment();
 
-            var importData = new Data.TextLoader();
-            importData.CustomSchema = dataSchema;
-            Data.TextLoader.Output imported = experiment.Add(importData);
+            var importData = new Data.TextLoader(dataPath)
+            {
+                Arguments = new TextLoaderArguments
+                {
+                    Separator = new[] { ',' },
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoaderColumn()
+                        {
+                            Name = "Id",
+                            Source = new [] { new TextLoaderRange(0) },
+                            Type = Runtime.Data.DataKind.Text
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Date",
+                            Source = new [] { new TextLoaderRange(1) },
+                            Type = Runtime.Data.DataKind.Text
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoaderRange(2) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Bedrooms",
+                            Source = new [] { new TextLoaderRange(3) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Bathrooms",
+                            Source = new [] { new TextLoaderRange(4) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SqftLiving",
+                            Source = new [] { new TextLoaderRange(5) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SqftLot",
+                            Source = new [] { new TextLoaderRange(6) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Floors",
+                            Source = new [] { new TextLoaderRange(7) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Waterfront",
+                            Source = new [] { new TextLoaderRange(8) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "View",
+                            Source = new [] { new TextLoaderRange(9) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Condition",
+                            Source = new [] { new TextLoaderRange(10) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
 
+                        new TextLoaderColumn()
+                        {
+                            Name = "Grade",
+                            Source = new [] { new TextLoaderRange(11) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SqftAbove",
+                            Source = new [] { new TextLoaderRange(12) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SqftBasement",
+                            Source = new [] { new TextLoaderRange(13) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "YearBuilt",
+                            Source = new [] { new TextLoaderRange(14) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "YearRenovated",
+                            Source = new [] { new TextLoaderRange(15) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Zipcode",
+                            Source = new [] { new TextLoaderRange(16) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Lat",
+                            Source = new [] { new TextLoaderRange(17) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "Long",
+                            Source = new [] { new TextLoaderRange(18) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SqftLiving15",
+                            Source = new [] { new TextLoaderRange(19) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SqftLot15",
+                            Source = new [] { new TextLoaderRange(20) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+                    }
+                }
+
+                //new Data.CustomTextLoader();
+                // importData.CustomSchema = dataSchema;
+                //
+            };
+
+            Data.TextLoader.Output imported = experiment.Add(importData);
             var numericalConcatenate = new Transforms.ColumnConcatenator();
             numericalConcatenate.Data = imported.Data;
             numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15");
diff --git a/test/Microsoft.ML.Tests/LearningPipelineTests.cs b/test/Microsoft.ML.Tests/LearningPipelineTests.cs
index 4519fc5285..3ccc36255f 100644
--- a/test/Microsoft.ML.Tests/LearningPipelineTests.cs
+++ b/test/Microsoft.ML.Tests/LearningPipelineTests.cs
@@ -5,6 +5,7 @@
 using Microsoft.ML;
 using Microsoft.ML.Data;
 using Microsoft.ML.Runtime.Api;
+using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.TestFramework;
 using Microsoft.ML.Trainers;
 using Microsoft.ML.Transforms;
@@ -66,7 +67,7 @@ public void TransformOnlyPipeline()
         {
             const string _dataPath = @"..\..\Data\breast-cancer.txt";
             var pipeline = new LearningPipeline();
-            pipeline.Add(new TextLoader<InputData>(_dataPath, useHeader: false));
+            pipeline.Add(new ML.Data.TextLoader(_dataPath).CreateFrom<InputData>(useHeader: false));
             pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag });
             var model = pipeline.Train<InputData, TransformedData>();
             var predictionModel = model.Predict(new InputData() { F1 = "5" });
@@ -95,7 +96,7 @@ public class Data
         public class Prediction
         {
             [ColumnName("PredictedLabel")]
-            public bool PredictedLabel;
+            public DvBool PredictedLabel;
         }
 
         [Fact]
diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs
index 38ec6ce073..31fc4fdd6d 100644
--- a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using Microsoft.ML.Data;
 using Microsoft.ML.Models;
 using Microsoft.ML.Runtime.Api;
 using Microsoft.ML.TestFramework;
@@ -21,7 +22,7 @@ public void TrainAndPredictHousePriceModelTest()
 
             var pipeline = new LearningPipeline();
 
-            pipeline.Add(new TextLoader<HousePriceData>(dataPath, useHeader: true, separator: ","));
+            pipeline.Add(new TextLoader(dataPath).CreateFrom<HousePriceData>(useHeader: true, separator: ','));
 
             pipeline.Add(new ColumnConcatenator(outputColumn: "NumericalFeatures",
                 "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"));
@@ -61,7 +62,7 @@ public void TrainAndPredictHousePriceModelTest()
             Assert.InRange(prediction.Price, 260_000, 330_000);
 
             string testDataPath = GetDataPath("kc_house_test.csv");
-            var testData = new TextLoader<HousePriceData>(testDataPath, useHeader: true, separator: ",");
+            var testData = new TextLoader(testDataPath).CreateFrom<HousePriceData>(useHeader: true, separator: ',');
 
             var evaluator = new RegressionEvaluator();
             RegressionMetrics metrics = evaluator.Evaluate(model, testData);
diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
index de7c602047..5dcbf3a588 100644
--- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using Microsoft.ML.Data;
 using Microsoft.ML.Models;
 using Microsoft.ML.Runtime.Api;
 using Microsoft.ML.Trainers;
@@ -19,7 +20,7 @@ public void TrainAndPredictIrisModelTest()
 
             var pipeline = new LearningPipeline();
 
-            pipeline.Add(new TextLoader<IrisData>(dataPath, useHeader: false, separator: "tab"));
+            pipeline.Add(new TextLoader(dataPath).CreateFrom<IrisData>(useHeader: false));
             pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                 "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
 
@@ -66,7 +67,7 @@ public void TrainAndPredictIrisModelTest()
             // Note: Testing against the same data set as a simple way to test evaluation.
             // This isn't appropriate in real-world scenarios.
             string testDataPath = GetDataPath("iris.txt");
-            var testData = new TextLoader<IrisData>(testDataPath, useHeader: false, separator: "tab");
+            var testData = new TextLoader(testDataPath).CreateFrom<IrisData>(useHeader: false);
 
             var evaluator = new ClassificationEvaluator();
             evaluator.OutputTopKAcc = 3;
diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
index 79cc2fc137..ebddc33b03 100644
--- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using Microsoft.ML.Data;
 using Microsoft.ML.Models;
 using Microsoft.ML.Runtime.Api;
 using Microsoft.ML.Trainers;
@@ -19,7 +20,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest()
 
             var pipeline = new LearningPipeline();
 
-            pipeline.Add(new TextLoader<IrisDataWithStringLabel>(dataPath, useHeader: false, separator: ","));
+            pipeline.Add(new TextLoader(dataPath).CreateFrom<IrisDataWithStringLabel>(useHeader: false, separator: ','));
 
             pipeline.Add(new Dictionarizer("Label"));  // "IrisPlantType" is used as "Label" because of column attribute name on the field.
 
@@ -69,7 +70,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest()
             // Note: Testing against the same data set as a simple way to test evaluation.
             // This isn't appropriate in real-world scenarios.
             string testDataPath = GetDataPath("iris.data");
-            var testData = new TextLoader<IrisDataWithStringLabel>(testDataPath, useHeader: false, separator: ",");
+            var testData = new TextLoader(testDataPath).CreateFrom<IrisDataWithStringLabel>(useHeader: false, separator: ',');
 
             var evaluator = new ClassificationEvaluator();
             evaluator.OutputTopKAcc = 3;
diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs
index 608cbef144..80947644e9 100644
--- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs
@@ -2,9 +2,11 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using Microsoft.ML.Data;
 using Microsoft.ML.Models;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.Api;
+using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.Trainers;
 using Microsoft.ML.Transforms;
 using System.Collections.Generic;
@@ -23,7 +25,32 @@ public void TrainAndPredictSentimentModelTest()
         {
             string dataPath = GetDataPath(SentimentDataPath);
             var pipeline = new LearningPipeline();
-            pipeline.Add(new TextLoader<SentimentData>(dataPath, useHeader: true, separator: "tab"));
+
+            pipeline.Add(new Data.TextLoader(dataPath)
+            {
+                Arguments = new TextLoaderArguments
+                {
+                    Separator = new[] { '\t' },
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoaderColumn()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoaderRange(0) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SentimentText",
+                            Source = new [] { new TextLoaderRange(1) },
+                            Type = Runtime.Data.DataKind.Text
+                        }
+                    }
+                }
+            });
+
             pipeline.Add(new TextFeaturizer("Features", "SentimentText")
             {
                 KeepDiacritics = false,
@@ -56,12 +83,34 @@ public void TrainAndPredictSentimentModelTest()
             IEnumerable<SentimentPrediction> predictions = model.Predict(sentiments);
 
             Assert.Equal(2, predictions.Count());
-            Assert.False(predictions.ElementAt(0).Sentiment);
-            Assert.True(predictions.ElementAt(1).Sentiment);
+            Assert.True(predictions.ElementAt(0).Sentiment.IsFalse);
+            Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);
 
             string testDataPath = GetDataPath(SentimentTestPath);
-            var testData = new TextLoader<SentimentData>(testDataPath, useHeader: true, separator: "tab");
-
+            var testData = new Data.TextLoader(testDataPath)
+            {
+                Arguments = new TextLoaderArguments
+                {
+                    Separator = new[] { '\t' },
+                    HasHeader = true,
+                    Column = new[]
+                    {
+                        new TextLoaderColumn()
+                        {
+                            Name = "Label",
+                            Source = new [] { new TextLoaderRange(0) },
+                            Type = Runtime.Data.DataKind.Num
+                        },
+
+                        new TextLoaderColumn()
+                        {
+                            Name = "SentimentText",
+                            Source = new [] { new TextLoaderRange(1) },
+                            Type = Runtime.Data.DataKind.Text
+                        }
+                    }
+                }
+            };
             var evaluator = new BinaryClassificationEvaluator();
             BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData);
 
@@ -105,7 +154,7 @@ public class SentimentData
         public class SentimentPrediction
         {
             [ColumnName("PredictedLabel")]
-            public bool Sentiment;
+            public DvBool Sentiment;
         }
     }
 }
diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs
index 96075b625a..40c0b6525f 100644
--- a/test/Microsoft.ML.Tests/TextLoaderTests.cs
+++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs
@@ -3,6 +3,7 @@
 // See the LICENSE file in the project root for more information.
 
 using Microsoft.ML;
+using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.Api;
 using Microsoft.ML.Runtime.Data;
@@ -24,19 +25,19 @@ public TextLoaderTests(ITestOutputHelper output)
         [Fact]
         public void ConstructorDoesntThrow()
         {
-            Assert.NotNull(new TextLoader<Input>("fakeFile.txt"));
-            Assert.NotNull(new TextLoader<Input>("fakeFile.txt", useHeader: true));
-            Assert.NotNull(new TextLoader<Input>("fakeFile.txt", separator: "tab"));
-            Assert.NotNull(new TextLoader<Input>("fakeFile.txt", useHeader: false, separator: "tab"));
-            Assert.NotNull(new TextLoader<Input>("fakeFile.txt", useHeader: false, separator: "tab", false, false));
-            Assert.NotNull(new TextLoader<Input>("fakeFile.txt", useHeader: false, separator: "tab", supportSparse: false));
-            Assert.NotNull(new TextLoader<Input>("fakeFile.txt", useHeader: false, separator: "tab", allowQuotedStrings: false));
+            Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom<Input>());
+            Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom<Input>(useHeader:true));
+            Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom<Input>());
+            Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom<Input>(useHeader: false));
+            Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom<Input>(useHeader: false, supportSparse: false, trimWhitespace: false));
+            Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom<Input>(useHeader: false, supportSparse: false));
+            Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom<Input>(useHeader: false, allowQuotedStrings: false));
         }
 
         [Fact]
         public void CanSuccessfullyApplyATransform()
         {
-            var loader = new TextLoader<Input>("fakeFile.txt");
+            var loader = new Data.TextLoader("fakeFile.txt").CreateFrom<Input>();
 
             using (var environment = new TlcEnvironment())
             {
@@ -53,7 +54,7 @@ public void CanSuccessfullyApplyATransform()
         public void CanSuccessfullyRetrieveQuotedData()
         {
             string dataPath = GetDataPath("QuotingData.csv");
-            var loader = new TextLoader<QuoteInput>(dataPath, useHeader: true, separator: ",", allowQuotedStrings: true, supportSparse: false);
+            var loader = new Data.TextLoader(dataPath).CreateFrom<QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: true, supportSparse: false);
             
             using (var environment = new TlcEnvironment())
             {
@@ -111,7 +112,7 @@ public void CanSuccessfullyRetrieveQuotedData()
         public void CanSuccessfullyRetrieveSparseData()
         {
             string dataPath = GetDataPath("SparseData.txt");
-            var loader = new TextLoader<SparseInput>(dataPath, useHeader: true, separator: "tab", allowQuotedStrings: false, supportSparse: true);
+            var loader = new Data.TextLoader(dataPath).CreateFrom<SparseInput>(useHeader: true, allowQuotedStrings: false, supportSparse: true);
 
             using (var environment = new TlcEnvironment())
             {
@@ -176,7 +177,7 @@ public void CanSuccessfullyRetrieveSparseData()
         public void CanSuccessfullyTrimSpaces()
         {
             string dataPath = GetDataPath("TrimData.csv");
-            var loader = new TextLoader<QuoteInput>(dataPath, useHeader: true, separator: ",", allowQuotedStrings: false, supportSparse: false, trimWhitespace: true);
+            var loader = new Data.TextLoader(dataPath).CreateFrom<QuoteInput>(useHeader: true, separator: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true);
 
             using (var environment = new TlcEnvironment())
             {
@@ -223,7 +224,7 @@ public void CanSuccessfullyTrimSpaces()
         [Fact]
         public void ThrowsExceptionWithPropertyName()
         {
-            Exception ex = Assert.Throws<ArgumentOutOfRangeException>( () => new TextLoader<ModelWithoutColumnAttribute>("fakefile.txt") );
+            Exception ex = Assert.Throws<InvalidOperationException>( () => new Data.TextLoader("fakefile.txt").CreateFrom<ModelWithoutColumnAttribute>() );
             Assert.StartsWith("String1 is missing ColumnAttribute", ex.Message);
         }