diff --git a/README.md b/README.md
index 8551dafc..1ec683ab 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # NimbusML
 
-`nimbusml` is a Python module that provides experimental Python bindings for [ML.NET](https://github.com/dotnet/machinelearning). 
+`nimbusml` is a Python module that provides Python bindings for [ML.NET](https://github.com/dotnet/machinelearning). 
 
 ML.NET was originally developed in Microsoft Research and is used across many product groups in Microsoft like Windows, Bing, PowerPoint, Excel and others. `nimbusml` was built to enable data science teams that are more familiar with Python to take advantage of ML.NET's functionality and performance. 
 
diff --git a/build.cmd b/build.cmd
index 8b6bf29b..8ed5005d 100644
--- a/build.cmd
+++ b/build.cmd
@@ -311,6 +311,14 @@ copy  "%BuildOutputDir%%Configuration%\pybridge.pyd" "%__currentScriptDir%src\py
 
 if %PythonVersion% == 2.7 (
     copy "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\*.dll" "%__currentScriptDir%src\python\nimbusml\internal\libs\"
+	:: remove dataprep dlls as its not supported in python 2.7
+	del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.DPrep.*"
+	del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.Data.*"
+	del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.ProgramSynthesis.*"
+	del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.DataPrep.dll"
+	del "%__currentScriptDir%src\python\nimbusml\internal\libs\ExcelDataReader.dll"
+	del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.WindowsAzure.Storage.dll"
+	del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.Workbench.Messaging.SDK.dll"
 ) else (
     for /F "tokens=*" %%A in (build/libs_win.txt) do copy "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\%%A" "%__currentScriptDir%src\python\nimbusml\internal\libs\"
 )
@@ -340,10 +348,15 @@ if "%InstallPythonPackages%" == "True" (
     echo "#################################"
     echo "Installing python packages ... "
     echo "#################################"
+    call "%PythonExe%" -m pip install --upgrade pip
     call "%PythonExe%" -m pip install --upgrade nose pytest graphviz imageio pytest-cov "jupyter_client>=4.4.0" "nbconvert>=4.2.0"
-    if %PythonVersion% == 2.7 ( call "%PythonExe%" -m pip install --upgrade pyzmq )
-    :: Run azureml-dataprep tests only in pyhon 3.7 as its an optional dependency
-    if %PythonVersion% == 3.7 ( call "%PythonExe%" -m pip install --upgrade azureml-dataprep )
+
+    if %PythonVersion% == 2.7 (
+        call "%PythonExe%" -m pip install --upgrade pyzmq
+    ) else (
+        call "%PythonExe%" -m pip install --upgrade "azureml-dataprep>=1.1.12" 
+    )
+
     call "%PythonExe%" -m pip install --upgrade "%__currentScriptDir%target\%WheelFile%"
     call "%PythonExe%" -m pip install "scikit-learn==0.19.2"
 )
diff --git a/build.sh b/build.sh
index b1300c3f..6d5221c9 100755
--- a/build.sh
+++ b/build.sh
@@ -52,7 +52,7 @@ while [ "$1" != "" ]; do
             __runTests=true
             __installPythonPackages=true
             ;;
-        --installPythonPackages)
+        --installpythonpackages)
             __installPythonPackages=true
             ;;
         --includeextendedtests)
@@ -219,6 +219,19 @@ then
             ext=*.dylib
 		fi	
 		cp  "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/${ext} "${__currentScriptDir}/src/python/nimbusml/internal/libs/"
+		# Obtain "libtensorflow_framework.so.1", which is the upgraded version of "libtensorflow.so". This is required for tests TensorFlowScorer.py to pass in Linux distros with Python 2.7
+		if [ ! "$(uname -s)" = "Darwin" ]
+		then
+			cp  "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/libtensorflow_framework.so.1 "${__currentScriptDir}/src/python/nimbusml/internal/libs/"
+		fi
+		# remove dataprep dlls as its not supported in python 2.7
+		rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.DPrep.*"
+		rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.Data.*"
+		rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.ProgramSynthesis.*"
+		rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.DataPrep.dll"
+		rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/ExcelDataReader.dll"
+		rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.WindowsAzure.Storage.dll"
+		rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.Workbench.Messaging.SDK.dll"
     else
 		libs_txt=libs_linux.txt
 		if [ "$(uname -s)" = "Darwin" ]
@@ -271,13 +284,14 @@ then
     if [ ${PythonVersion} = 2.7 ]
     then
         "${PythonExe}" -m pip install --upgrade pyzmq
-    elif [ ${PythonVersion} = 3.6 ] && [ "$(uname -s)" = "Darwin" ]
-    then
-        "${PythonExe}" -m pip install --upgrade pytest-remotedata
-    elif [ ${PythonVersion} = 3.7 ]
-    then
-        "${PythonExe}" -m pip install --upgrade azureml-dataprep
-	fi
+    else
+        if [ ${PythonVersion} = 3.6 ] && [ "$(uname -s)" = "Darwin" ]
+        then
+            "${PythonExe}" -m pip install --upgrade pytest-remotedata
+        fi
+
+        "${PythonExe}" -m pip install --upgrade "azureml-dataprep>=1.1.12"
+    fi
     "${PythonExe}" -m pip install --upgrade "${Wheel}"
     "${PythonExe}" -m pip install "scikit-learn==0.19.2"
 fi
diff --git a/build/libs_linux.txt b/build/libs_linux.txt
index d53a5a84..6ce4cbed 100644
--- a/build/libs_linux.txt
+++ b/build/libs_linux.txt
@@ -12,6 +12,4 @@ libonnxruntime.so
 System.Drawing.Common.dll
 TensorFlow.NET.dll
 NumSharp.Core.dll
-Microsoft.DataPrep.dll
-Microsoft.DPrep.*
 Microsoft.ML.*
diff --git a/build/libs_mac.txt b/build/libs_mac.txt
index de7e27b3..85544169 100644
--- a/build/libs_mac.txt
+++ b/build/libs_mac.txt
@@ -12,6 +12,4 @@ libtensorflow_framework.1.dylib
 System.Drawing.Common.dll
 TensorFlow.NET.dll
 NumSharp.Core.dll
-Microsoft.DataPrep.dll
-Microsoft.DPrep.*
 Microsoft.ML.*
diff --git a/build/libs_win.txt b/build/libs_win.txt
index 62c1bab0..7ef9cca7 100644
--- a/build/libs_win.txt
+++ b/build/libs_win.txt
@@ -12,6 +12,4 @@ tensorflow.dll
 TensorFlow.NET.dll
 NumSharp.Core.dll
 System.Drawing.Common.dll
-Microsoft.DataPrep.dll
-Microsoft.DPrep.*
 Microsoft.ML.*
diff --git a/docs/release-notes/release-1.4.0.md b/docs/release-notes/release-1.4.0.md
new file mode 100644
index 00000000..1c30e978
--- /dev/null
+++ b/docs/release-notes/release-1.4.0.md
@@ -0,0 +1,57 @@
+# [NimbusML](https://docs.microsoft.com/en-us/nimbusml/overview) 1.4.0
+
+## **New Features**
+
+- **Add initial implementation of DatasetTransformer.**
+
+    [PR#240](https://github.com/microsoft/NimbusML/pull/240)
+    This transform allows a fitted transformer based model to be inserted
+    in to another `Pipeline`.
+
+    ```python
+    Pipeline([
+        DatasetTransformer(transform_model=transform_pipeline.model),
+        OnlineGradientDescentRegressor(label='c2', feature=['c1'])
+    ])
+    ```
+
+## **Bug Fixes**
+
+- **Fixed `classes_` attribute when no `y` input specified **
+
+    [PR#218](https://github.com/microsoft/NimbusML/pull/218)
+    Fix a bug with the classes_ attribute when no y input is specified during fitting.
+    This addresses [issue 216](https://github.com/microsoft/NimbusML/issues/216)
+
+- **Fixed Add NumSharp.Core.dll **
+
+    [PR#220](https://github.com/microsoft/NimbusML/pull/220)
+    Fixed a bug that prevented running TensorFlowScorer.
+    This addresses [issue 219](https://github.com/microsoft/NimbusML/issues/219)
+
+- **Fixed Enable scoring of ML.NET models saved with new TransformerChain format **
+
+    [PR#230](https://github.com/microsoft/NimbusML/pull/230)
+    Fixed error loading a model that was saved with mlnet auto-train.
+    This addresses [issue 201](https://github.com/microsoft/NimbusML/issues/201)
+
+- **Fixed Pass python path to Dprep package **
+
+    [PR#232](https://github.com/microsoft/NimbusML/pull/232)
+    Enable passing python executable to dataprep package, so dataprep can execute python transformations
+
+## **Breaking Changes**
+
+None.
+
+## **Enhancements**
+
+None.
+
+## **Documentation and Samples**
+
+None. 
+
+## **Remarks**
+
+None.
diff --git a/release-next.md b/release-next.md
index 68bfa7ef..031f060f 100644
--- a/release-next.md
+++ b/release-next.md
@@ -2,15 +2,91 @@
 
 ## **New Features**
 
-None.
+- **Initial implementation of `csr_matrix` output support.**
+
+    [PR#250](https://github.com/microsoft/NimbusML/pull/250)
+    Add support for data output in `scipy.sparse.csr_matrix` format.
+
+    ```python
+    xf = OneHotVectorizer(columns={'c0':'c0', 'c1':'c1'})
+    xf.fit(train_df)
+    result = xf.transform(train_df, as_csr=True)
+    ```
+    
+- **Permutation Feature Importance for model interpretibility.**
+
+    [PR#279](https://github.com/microsoft/NimbusML/pull/279)
+    Adds `permutation_feature_importance()` method to `Pipeline` and
+    predictor estimators, enabling evaluation of model-wide feature
+    importances on any dataset with same schema as the dataset used
+    to fit the `Pipeline`.
+
+    ```python
+    pipe = Pipeline([
+        LogisticRegressionBinaryClassifier(label='label', feature=['feature'])
+    ])
+    pipe.fit(data)
+    pipe.permutation_feature_importance(data)
+    ```
+
+- **Initial implementation of DateTime input and output column support.**
+
+    [PR#290](https://github.com/microsoft/NimbusML/pull/290)
+    Add initial support for input and output of Pandas DateTime columns.
+
+- **Initial implementation of LpScaler.**
+
+    [PR#253](https://github.com/microsoft/NimbusML/pull/253)
+    Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf).
+    Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D
+    is either L2 norm, L1 norm or LInf norm.
+
+- **Add support for variable length vector output.**
+
+    [PR#267](https://github.com/microsoft/NimbusML/pull/267)
+    Support output of columns returned from ML.Net which contain variable length vectors.
+
+- **Save `predictor_model` when pickling a `Pipeline`.**
+
+    [PR#295](https://github.com/microsoft/NimbusML/pull/295)
+
+- **Initial implementation of the WordTokenizer transform.**
+
+    [PR#296](https://github.com/microsoft/NimbusML/pull/296)
+
+- **Add support for summary output from tree based predictors.**
+
+    [PR#298](https://github.com/microsoft/NimbusML/pull/298)
 
 ## **Bug Fixes**
 
-None.
+- **Fixed `Pipeline.transform()` in transform only `Pipeline` fails if y column is provided **
+
+    [PR#294](https://github.com/microsoft/NimbusML/pull/294)
+    Enable calling `.transform()` on a `Pipeline` containing only transforms when the y column is provided 
+
+- **Fix issue when using `predict_proba` or `decision_function` with combined models.**
+
+    [PR#272](https://github.com/microsoft/NimbusML/pull/272)
+
+- **Fix `Pipeline._extract_classes_from_headers` was not checking for valid steps.**
+
+    [PR#292](https://github.com/microsoft/NimbusML/pull/292)
+
+- **Fix BinaryDataStream was not valid as input for transformer.**
+
+    [PR#307](https://github.com/microsoft/NimbusML/pull/307)
+
+- **Fix casing for the installPythonPackages build.sh argument.**
+
+    [PR#256](https://github.com/microsoft/NimbusML/pull/256)
 
 ## **Breaking Changes**
 
-None.
+- **Removed `y` parameter from `Pipeline.transform()`**
+
+    [PR#294](https://github.com/microsoft/NimbusML/pull/294)
+    Removed `y` parameter from `Pipeline.transform()` as it is not needed nor used for transforming data with a fitted `Pipeline`.
 
 ## **Enhancements**
 
diff --git a/src/DotNetBridge/Bridge.cs b/src/DotNetBridge/Bridge.cs
index 40220cc8..a7954355 100644
--- a/src/DotNetBridge/Bridge.cs
+++ b/src/DotNetBridge/Bridge.cs
@@ -7,10 +7,8 @@
 using System.Runtime.InteropServices;
 using System.Text;
 using System.Threading;
-using Microsoft.ML;
 using Microsoft.ML.Data;
 using Microsoft.ML.EntryPoints;
-using Microsoft.ML.Model.OnnxConverter;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Trainers;
 using Microsoft.ML.Trainers.Ensemble;
@@ -19,7 +17,7 @@
 using Microsoft.ML.Transforms;
 using Microsoft.ML.Transforms.TimeSeries;
 
-namespace Microsoft.MachineLearning.DotNetBridge
+namespace Microsoft.ML.DotNetBridge
 {
     /// <summary>
     /// The main entry point from native code. Note that GC / lifetime issues are critical to get correct.
@@ -130,51 +128,51 @@ public unsafe static partial class Bridge
 
         // For setting bool values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void BLSetter(EnvironmentBlock* penv, int col, long index, byte value);
+        private unsafe delegate void BLSetter(EnvironmentBlock* penv, int col, long m, long n, byte value);
 
         // For setting float values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void R4Setter(EnvironmentBlock* penv, int col, long index, float value);
+        private unsafe delegate void R4Setter(EnvironmentBlock* penv, int col, long m, long n, float value);
 
         // For setting double values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void R8Setter(EnvironmentBlock* penv, int col, long index, double value);
+        private unsafe delegate void R8Setter(EnvironmentBlock* penv, int col, long m, long n, double value);
 
         // For setting I1 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void I1Setter(EnvironmentBlock* penv, int col, long index, sbyte value);
+        private unsafe delegate void I1Setter(EnvironmentBlock* penv, int col, long m, long n, sbyte value);
 
         // For setting I2 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void I2Setter(EnvironmentBlock* penv, int col, long index, short value);
+        private unsafe delegate void I2Setter(EnvironmentBlock* penv, int col, long m, long n, short value);
 
         // For setting I4 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void I4Setter(EnvironmentBlock* penv, int col, long index, int value);
+        private unsafe delegate void I4Setter(EnvironmentBlock* penv, int col, long m, long n, int value);
 
         // For setting I8 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void I8Setter(EnvironmentBlock* penv, int col, long index, long value);
+        private unsafe delegate void I8Setter(EnvironmentBlock* penv, int col, long m, long n, long value);
 
         // For setting U1 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void U1Setter(EnvironmentBlock* penv, int col, long index, byte value);
+        private unsafe delegate void U1Setter(EnvironmentBlock* penv, int col, long m, long n, byte value);
 
         // For setting U2 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void U2Setter(EnvironmentBlock* penv, int col, long index, ushort value);
+        private unsafe delegate void U2Setter(EnvironmentBlock* penv, int col, long m, long n, ushort value);
 
         // For setting U4 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void U4Setter(EnvironmentBlock* penv, int col, long index, uint value);
+        private unsafe delegate void U4Setter(EnvironmentBlock* penv, int col, long m, long n, uint value);
 
         // For setting U8 values to NativeBridge.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void U8Setter(EnvironmentBlock* penv, int col, long index, ulong value);
+        private unsafe delegate void U8Setter(EnvironmentBlock* penv, int col, long m, long n, ulong value);
 
         // For setting string values, to a generic pointer and index.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private unsafe delegate void TXSetter(EnvironmentBlock* penv, int col, long index, sbyte* pch, int cch);
+        private unsafe delegate void TXSetter(EnvironmentBlock* penv, int col, long m, long n, sbyte* pch, int cch);
 
         // For setting string key values, to a generic pointer and index.
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
@@ -186,12 +184,6 @@ private enum FnId
             Generic = 2,
         }
 
-#if !CORECLR
-        // The hosting code invokes this to get a specific entry point.
-        [UnmanagedFunctionPointer(CallingConvention.StdCall)]
-        private delegate IntPtr NativeFnGetter(FnId id);
-#endif
-
         #region Callbacks to native
 
         // Call back to provide messages to native code.
@@ -236,8 +228,9 @@ private struct EnvironmentBlock
             [FieldOffset(0x18)]
             public readonly void* modelSink;
 
+            //Max slots to return for vector valued columns(<=0 to return all).
             [FieldOffset(0x20)]
-            public readonly int maxThreadsAllowed;
+            public readonly int maxSlots;
 
             // Call back to provide cancel flag.
             [FieldOffset(0x28)]
@@ -252,41 +245,14 @@ private struct EnvironmentBlock
         [UnmanagedFunctionPointer(CallingConvention.StdCall)]
         private unsafe delegate int NativeGeneric(EnvironmentBlock* penv, sbyte* psz, int cdata, DataSourceBlock** ppdata);
 
-#if !CORECLR
-        private static NativeFnGetter FnGetter;
-#endif
         private static NativeGeneric FnGeneric;
 
         private static TDel MarshalDelegate<TDel>(void* pv)
         {
             Contracts.Assert(typeof(TDel).IsSubclassOf(typeof(Delegate)));
             Contracts.Assert(pv != null);
-#if CORECLR
             return Marshal.GetDelegateForFunctionPointer<TDel>((IntPtr)pv);
-#else
-            return (TDel)(object)Marshal.GetDelegateForFunctionPointer((IntPtr)pv, typeof(TDel));
-#endif
-        }
-
-#if !CORECLR
-        /// <summary>
-        /// This is the bootstrapping entry point. It's labeled private but is actually invoked from the native
-        /// code to poke the address of the FnGetter callback into the address encoded in the string parameter.
-        /// This odd way of doing things is because the most convenient way to call an initial managed method
-        /// imposes the signature of Func{string, int}, which doesn't allow us to return a function adress.
-        /// </summary>
-        private static unsafe int GetFnGetterCallback(string addr)
-        {
-            if (FnGetter == null)
-                Interlocked.CompareExchange(ref FnGetter, (NativeFnGetter)GetFn, null);
-            long a = long.Parse(addr);
-            IntPtr* p = null;
-            IntPtr** pp = &p;
-            *(long*)pp = a;
-            *p = Marshal.GetFunctionPointerForDelegate(FnGetter);
-            return 1;
         }
-#endif
 
         /// <summary>
         /// This is the main FnGetter function. Given an FnId value, it returns a native-callable
@@ -334,6 +300,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
             //env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly);
             //env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly);
             env.ComponentCatalog.RegisterAssembly(typeof(SsaChangePointDetector).Assembly);
+            env.ComponentCatalog.RegisterAssembly(typeof(DotNetBridgeEntrypoints).Assembly);
 
             using (var ch = host.Start("Executing"))
             {
@@ -397,7 +364,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
                     // Wrap the data sets.
                     ch.Trace("Wrapping native data sources");
                     ch.Trace("Executing");
-                    ExecCore(penv, host, ch, graph, cdata, ppdata);
+                    RunGraphCore(penv, host, graph, cdata, ppdata);
                 }
                 catch (Exception e)
                 {
@@ -420,24 +387,6 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
             return 0;
         }
 
-        private static void CheckModel(IHost host, byte** ppModelBin, long* pllModelBinLen, int i)
-        {
-            host.CheckParam(
-                ppModelBin != null && ppModelBin[i] != null
-                && pllModelBinLen != null && pllModelBinLen[i] > 0, "pModelBin", "Model is missing");
-        }
-
-        private static void ExecCore(EnvironmentBlock* penv, IHost host, IChannel ch, string graph, int cdata, DataSourceBlock** ppdata)
-        {
-            Contracts.AssertValue(ch);
-            ch.AssertValue(host);
-            ch.AssertNonEmpty(graph);
-            ch.Assert(cdata >= 0);
-            ch.Assert(ppdata != null || cdata == 0);
-
-            RunGraphCore(penv, host, graph, cdata, ppdata);
-        }
-
         /// <summary>
         /// Convert UTF8 bytes with known length to ROM<char>. Negative length unsupported.
         /// </summary>
@@ -483,25 +432,7 @@ internal static string BytesToString(sbyte* psz)
 
             if (cch == 0)
                 return null;
-#if CORECLR
-
             return Encoding.UTF8.GetString((byte*)psz, cch);
-#else
-            if (cch <= 0)
-                return "";
-
-            var decoder = Encoding.UTF8.GetDecoder();
-            var chars = new char[decoder.GetCharCount((byte*)psz, cch, true)];
-            int bytesUsed;
-            int charsUsed;
-            bool complete;
-            fixed (char* pchars = chars)
-                decoder.Convert((byte*)psz, cch, pchars, chars.Length, true, out bytesUsed, out charsUsed, out complete);
-            Contracts.Assert(bytesUsed == cch);
-            Contracts.Assert(charsUsed == chars.Length);
-            Contracts.Assert(complete);
-            return new string(chars);
-#endif
         }
 
         /// <summary>
diff --git a/src/DotNetBridge/DotNetBridge.csproj b/src/DotNetBridge/DotNetBridge.csproj
index dd0e2c5d..822db6aa 100644
--- a/src/DotNetBridge/DotNetBridge.csproj
+++ b/src/DotNetBridge/DotNetBridge.csproj
@@ -16,6 +16,7 @@
     <PackageProjectUrl>https://github.com/Microsoft/NimbusML</PackageProjectUrl>
     <SignAssembly>true</SignAssembly>
     <AssemblyOriginatorKeyFile>DotNetBridge.snk</AssemblyOriginatorKeyFile>
+    <LangVersion>latest</LangVersion>
   </PropertyGroup>
 
   <ItemGroup>
@@ -31,19 +32,19 @@
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>
-    <PackageReference Include="Microsoft.ML" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.CpuMath" Version="1.3.1" />    
-    <PackageReference Include="Microsoft.ML.EntryPoints" Version="0.15.1" />
-    <PackageReference Include="Microsoft.ML.Mkl.Components" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.ImageAnalytics" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.LightGBM" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.OnnxTransformer" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.TensorFlow" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.Dnn" Version="0.15.1" />
-    <PackageReference Include="Microsoft.ML.Ensemble" Version="0.15.1" />
-    <PackageReference Include="Microsoft.ML.TimeSeries" Version="1.3.1" />
+    <PackageReference Include="Microsoft.ML" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.CpuMath" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.EntryPoints" Version="0.16.0-preview2" />
+    <PackageReference Include="Microsoft.ML.Mkl.Components" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.ImageAnalytics" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.LightGBM" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.OnnxTransformer" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.TensorFlow" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.Dnn" Version="0.16.0-preview2" />
+    <PackageReference Include="Microsoft.ML.Ensemble" Version="0.16.0-preview2" />
+    <PackageReference Include="Microsoft.ML.TimeSeries" Version="1.4.0-preview2" />
     <PackageReference Include="Microsoft.DataPrep" Version="0.0.1.12-preview" />
-    <PackageReference Include="TensorFlow.NET" Version="0.10.10" />
+    <PackageReference Include="TensorFlow.NET" Version="0.11.3" />
     <PackageReference Include="SciSharp.TensorFlow.Redist" Version="1.14.0" />
   </ItemGroup>
 </Project>
diff --git a/src/DotNetBridge/Entrypoints.cs b/src/DotNetBridge/Entrypoints.cs
new file mode 100644
index 00000000..9be84e67
--- /dev/null
+++ b/src/DotNetBridge/Entrypoints.cs
@@ -0,0 +1,182 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Microsoft.ML;
+using Microsoft.ML.CommandLine;
+using Microsoft.ML.DotNetBridge;
+using Microsoft.ML.Data;
+using Microsoft.ML.EntryPoints;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Transforms;
+
+[assembly: LoadableClass(typeof(void), typeof(DotNetBridgeEntrypoints), null, typeof(SignatureEntryPointModule), "DotNetBridgeEntrypoints")]
+
+[assembly: LoadableClass(VariableColumnTransform.Summary, typeof(VariableColumnTransform), null, typeof(SignatureLoadDataTransform),
+    "", VariableColumnTransform.LoaderSignature)]
+
+namespace Microsoft.ML.DotNetBridge
+{
+    internal static class DotNetBridgeEntrypoints
+    {
+        [TlcModule.EntryPoint(Name = "Transforms.PrefixColumnConcatenator", Desc = ColumnConcatenatingTransformer.Summary,
+            UserName = ColumnConcatenatingTransformer.UserName, ShortName = ColumnConcatenatingTransformer.LoadName)]
+        public static CommonOutputs.TransformOutput ConcatColumns(IHostEnvironment env, ColumnCopyingTransformer.Options input)
+        {
+            Contracts.CheckValue(env, nameof(env));
+            var host = env.Register("PrefixConcatColumns");
+            host.CheckValue(input, nameof(input));
+            EntryPointUtils.CheckInputArgs(host, input);
+
+            // Get all column names with preserving order.
+            var colNames = new List<string>(input.Data.Schema.Count);
+            for (int i = 0; i < input.Data.Schema.Count; i++)
+                colNames.Add(input.Data.Schema[i].Name);
+
+            // Iterate throuh input options, find matching source columns, create new input options
+            var inputOptions = new ColumnConcatenatingTransformer.Options() { Data = input.Data };
+            var columns = new List<ColumnConcatenatingTransformer.Column>(input.Columns.Length);
+            foreach (var col in input.Columns)
+            {
+                var newCol = new ColumnConcatenatingTransformer.Column();
+                newCol.Name = col.Name;
+                var prefix = col.Source;
+                newCol.Source = colNames.Where(x => x.StartsWith(prefix, StringComparison.InvariantCulture)).ToArray();
+                if (newCol.Source.Length == 0)
+                    throw new ArgumentOutOfRangeException("No matching columns found for prefix: " + prefix);
+
+                columns.Add(newCol);
+            }
+            inputOptions.Columns = columns.ToArray();
+
+            var xf = ColumnConcatenatingTransformer.Create(env, inputOptions, inputOptions.Data);
+            return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, inputOptions.Data), OutputData = xf };
+        }
+
+        public sealed class TransformModelInput
+        {
+            [Argument(ArgumentType.Required, HelpText = "The transform model.", SortOrder = 1)]
+            public TransformModel Model;
+        }
+
+        public sealed class ModelSchemaOutput
+        {
+            [TlcModule.Output(Desc = "The model schema", SortOrder = 1)]
+            public IDataView Schema;
+        }
+
+        [TlcModule.EntryPoint(Name = "Models.Schema", Desc = "Retrieve output model schema")]
+        public static ModelSchemaOutput GetSchema(IHostEnvironment env, TransformModelInput input)
+        {
+            Contracts.CheckValue(env, nameof(env));
+            var host = env.Register("GetSchema");
+            host.CheckValue(input, nameof(input));
+            EntryPointUtils.CheckInputArgs(host, input);
+
+            return new ModelSchemaOutput { Schema = new EmptyDataView(host, input.Model.OutputSchema) };
+        }
+
+        [TlcModule.EntryPoint(Name = "Transforms.VariableColumnTransform", Desc = VariableColumnTransform.Summary,
+            UserName = "Variable Column Creator", ShortName = "Variable Column Creator")]
+        public static CommonOutputs.TransformOutput CreateVariableColumn(IHostEnvironment env, VariableColumnTransform.Options inputOptions)
+        {
+            Contracts.CheckValue(env, nameof(env));
+            var host = env.Register("VariableColumnCreator");
+            EntryPointUtils.CheckInputArgs(host, inputOptions);
+
+            var xf = VariableColumnTransform.Create(env, inputOptions, inputOptions.Data);
+            return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, inputOptions.Data), OutputData = xf };
+        }
+
+        public sealed class ScoringTransformInput
+        {
+            [Argument(ArgumentType.Required, HelpText = "The dataset to be scored", SortOrder = 1)]
+            public IDataView Data;
+
+            [Argument(ArgumentType.Required, HelpText = "The predictor model to apply to data", SortOrder = 2)]
+            public PredictorModel PredictorModel;
+
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Suffix to append to the score columns", SortOrder = 3)]
+            public string Suffix;
+        }
+
+        public sealed class ScoringTransformOutput
+        {
+            [TlcModule.Output(Desc = "The scored dataset", SortOrder = 1)]
+            public IDataView ScoredData;
+
+            [TlcModule.Output(Desc = "The scoring transform", SortOrder = 2)]
+            public TransformModel ScoringTransform;
+        }
+
+        private static bool AreSchemasCompatible(DataViewSchema schema1, DataViewSchema schema2)
+        {
+            if (schema1 == null)
+                return schema2 == null;
+            if (schema2 == null)
+                return schema1 == null;
+            if (schema1.Count != schema2.Count)
+                return false;
+
+            for (int i = 0; i < schema1.Count; i++)
+            {
+                if(schema1[i].Type != schema2[i].Type)
+                    return false;
+            }
+
+            return true;
+        }
+
+        [TlcModule.EntryPoint(Name = "Transforms.DatasetScorerEx", Desc = "Score a dataset with a predictor model")]
+        public static ScoringTransformOutput Score(IHostEnvironment env, ScoringTransformInput input)
+        {
+            Contracts.CheckValue(env, nameof(env));
+            var host = env.Register("ScoreModel");
+            host.CheckValue(input, nameof(input));
+            EntryPointUtils.CheckInputArgs(host, input);
+
+            RoleMappedData data;
+            IPredictor predictor;
+            var inputData = input.Data;
+            try
+            {
+                input.PredictorModel.PrepareData(host, inputData, out data, out predictor);
+            }
+            catch (Exception)
+            {
+                // this can happen in csr_matrix case, try to use only trainer model.
+                host.Assert(inputData.Schema.Count == 1);
+                var inputColumnName = inputData.Schema[0].Name;
+                var trainingSchema = input.PredictorModel.GetTrainingSchema(host);
+                // get feature vector item type.
+                var trainingFeatureColumn = (DataViewSchema.Column)trainingSchema.Feature;
+                var requiredType = trainingFeatureColumn.Type.GetItemType().RawType;
+                var featuresColumnName = trainingFeatureColumn.Name;
+                predictor = input.PredictorModel.Predictor;
+                var xf = new TypeConvertingTransformer(host,
+                    new TypeConvertingEstimator.ColumnOptions(featuresColumnName, requiredType, inputColumnName)).Transform(inputData);
+                data = new RoleMappedData(xf, null, featuresColumnName);
+            }
+
+            IDataView scoredPipe;
+            using (var ch = host.Start("Creating scoring pipeline"))
+            {
+                ch.Trace("Creating pipeline");
+                var bindable = ScoreUtils.GetSchemaBindableMapper(host, predictor);
+                ch.AssertValue(bindable);
+
+                var mapper = bindable.Bind(host, data.Schema);
+                var scorer = ScoreUtils.GetScorerComponent(host, mapper, input.Suffix);
+                scoredPipe = scorer.CreateComponent(host, data.Data, mapper, input.PredictorModel.GetTrainingSchema(host));
+            }
+
+            return
+                new ScoringTransformOutput
+                {
+                    ScoredData = scoredPipe,
+                    ScoringTransform = new TransformModelImpl(host, scoredPipe, inputData)
+                };
+
+        }
+    }
+}
diff --git a/src/DotNetBridge/MessageValidator.cs b/src/DotNetBridge/MessageValidator.cs
index 2aa78c27..4243a45d 100644
--- a/src/DotNetBridge/MessageValidator.cs
+++ b/src/DotNetBridge/MessageValidator.cs
@@ -7,7 +7,7 @@
 using System.Globalization;
 using Microsoft.ML.Runtime;
 
-namespace Microsoft.MachineLearning.DotNetBridge
+namespace Microsoft.ML.DotNetBridge
 {
     /// <summary>
     /// This is a temporary solution to validate the messages from ML.NET to nimbusml.
diff --git a/src/DotNetBridge/NativeDataInterop.cs b/src/DotNetBridge/NativeDataInterop.cs
index c9b70526..461beb3c 100644
--- a/src/DotNetBridge/NativeDataInterop.cs
+++ b/src/DotNetBridge/NativeDataInterop.cs
@@ -9,14 +9,17 @@
 using System.Globalization;
 using System.Runtime.InteropServices;
 using System.Text;
-using Microsoft.ML;
 using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
+using System.Buffers;
 
-namespace Microsoft.MachineLearning.DotNetBridge
+namespace Microsoft.ML.DotNetBridge
 {
     public unsafe static partial class Bridge
     {
+        const int UTF8_BUFFER_SIZE = 10 * 1024 * 1024; // 10 MB
+        const int INDICES_BUFFER_SIZE = 1024 * 1024; // 1 Mln
+
         /// <summary>
         /// This is provided by the native code and represents a native data source. It provides schema
         /// information and call backs for iteration.
@@ -30,20 +33,17 @@ private struct DataSourceBlock
             [FieldOffset(0x08)]
             public readonly long crow;
             [FieldOffset(0x10)]
-            public readonly long* ids;
-            [FieldOffset(0x18)]
             public readonly sbyte** names;
-            [FieldOffset(0x20)]
+            [FieldOffset(0x18)]
             public readonly InternalDataKind* kinds;
-            [FieldOffset(0x28)]
+            [FieldOffset(0x20)]
             public readonly long* keyCards;
-            [FieldOffset(0x30)]
+            [FieldOffset(0x28)]
             public readonly long* vecCards;
-            [FieldOffset(0x38)]
-            public readonly void** getters;
-
+            [FieldOffset(0x30)]
             // Call back pointers.
-            [FieldOffset(0x40)]
+            public readonly void** getters;
+            [FieldOffset(0x38)]
             public readonly void* labelsGetter;
 #pragma warning restore 649 // never assigned
         }
@@ -77,6 +77,12 @@ private struct DataViewBlock
             // key types. Zero means unbounded, -1 means not a key type.
             [FieldOffset(0x20)]
             public int* keyCards;
+
+            // The number of values in each row of a column.
+            // A value count of 0 means that each row of the
+            // column is variable length.
+            [FieldOffset(0x28)]
+            public byte* valueCounts;
         }
 
         private struct ColumnMetadataInfo
@@ -93,7 +99,7 @@ public ColumnMetadataInfo(bool expand, string[] slotNames, Dictionary<uint, Read
             }
         }
 
-        private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock* penv, IDataView view, Dictionary<string, ColumnMetadataInfo> infos = null)
+        private static unsafe void SendViewToNativeAsDataFrame(IChannel ch, EnvironmentBlock* penv, IDataView view, Dictionary<string, ColumnMetadataInfo> infos = null)
         {
             Contracts.AssertValue(ch);
             Contracts.Assert(penv != null);
@@ -108,14 +114,13 @@ private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock* penv,
             var dataSink = MarshalDelegate<DataSink>(penv->dataSink);
 
             var schema = view.Schema;
-            var colIndices = new List<int>();
-            var kindList = new List<InternalDataKind>();
-            var keyCardList = new List<int>();
-            var nameUtf8Bytes = new List<Byte>();
-            var nameIndices = new List<int>();
-
-            var expandCols = new HashSet<int>();
-            var allNames = new HashSet<string>();
+            var colIndices = new List<int>(1000);
+            var kindList = new ValueListBuilder<InternalDataKind>(INDICES_BUFFER_SIZE);
+            var keyCardList = new ValueListBuilder<int>(INDICES_BUFFER_SIZE);
+            var nameUtf8Bytes = new ValueListBuilder<byte>(UTF8_BUFFER_SIZE);
+            var nameIndices = new ValueListBuilder<int>(INDICES_BUFFER_SIZE);
+            var expandCols = new HashSet<int>(1000);
+            var valueCounts = new List<byte>(1000);
 
             for (int col = 0; col < schema.Count; col++)
             {
@@ -129,11 +134,7 @@ private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock* penv,
                 var kind = itemType.GetRawKind();
                 int keyCard;
 
-                if (fullType.GetValueCount() == 0)
-                {
-                    throw ch.ExceptNotSupp("Column has variable length vector: " + 
-                        name + ". Not supported in python. Drop column before sending to Python");
-                }
+                byte valueCount = (fullType.GetValueCount() == 0) ? (byte)0 : (byte)1;
 
                 if (itemType is KeyDataViewType)
                 {
@@ -181,6 +182,7 @@ private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock* penv,
                     case InternalDataKind.R8:
                     case InternalDataKind.BL:
                     case InternalDataKind.TX:
+                    case InternalDataKind.DT:
                         break;
                     }
                     keyCard = -1;
@@ -201,63 +203,60 @@ private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock* penv,
                     {
                         Contracts.Assert(info.SlotNames.Length == nSlots);
                         for (int i = 0; i < nSlots; i++)
-                            AddUniqueName(info.SlotNames[i], allNames, nameIndices, nameUtf8Bytes);
+                            AddUniqueName(info.SlotNames[i], ref nameIndices, ref nameUtf8Bytes);
                     }
                     else if (schema[col].HasSlotNames(nSlots))
                     {
                         var romNames = default(VBuffer<ReadOnlyMemory<char>>);
                         schema[col].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref romNames);
-                        foreach (var kvp in romNames.Items(true))
-                        {
-                            // REVIEW: Add the proper number of zeros to the slot index to make them sort in the right order.
-                            var slotName = name + "." +
-                                (!kvp.Value.IsEmpty ? kvp.Value.ToString() : kvp.Key.ToString(CultureInfo.InvariantCulture));
-                            AddUniqueName(slotName, allNames, nameIndices, nameUtf8Bytes);
-                        }
+                        AddUniqueName(name, romNames, ref nameIndices, ref nameUtf8Bytes);
                     }
                     else
                     {
                         for (int i = 0; i < nSlots; i++)
-                            AddUniqueName(name + "." + i, allNames, nameIndices, nameUtf8Bytes);
+                            AddUniqueName(name + "." + i, ref nameIndices, ref nameUtf8Bytes);
                     }
                 }
                 else
                 {
                     nSlots = 1;
-                    AddUniqueName(name, allNames, nameIndices, nameUtf8Bytes);
+                    AddUniqueName(name, ref nameIndices, ref nameUtf8Bytes);
                 }
 
                 colIndices.Add(col);
                 for (int i = 0; i < nSlots; i++)
                 {
-                    kindList.Add(kind);
-                    keyCardList.Add(keyCard);
+                    kindList.Append(kind);
+                    keyCardList.Append(keyCard);
+                    valueCounts.Add(valueCount);
                 }
             }
 
-            ch.Assert(allNames.Count == kindList.Count);
-            ch.Assert(allNames.Count == keyCardList.Count);
-            ch.Assert(allNames.Count == nameIndices.Count);
+            ch.Assert(kindList.Length == keyCardList.Length);
+            ch.Assert(kindList.Length == nameIndices.Length);
 
-            var kinds = kindList.ToArray();
-            var keyCards = keyCardList.ToArray();
-            var nameBytes = nameUtf8Bytes.ToArray();
-            var names = new byte*[allNames.Count];
+            var kinds = kindList.AsSpan();
+            var keyCards = keyCardList.AsSpan();
+            var nameBytes = nameUtf8Bytes.AsSpan();
+            var names = new byte*[nameIndices.Length];
+            var valueCountsBytes = valueCounts.ToArray();
 
             fixed (InternalDataKind* prgkind = kinds)
             fixed (byte* prgbNames = nameBytes)
             fixed (byte** prgname = names)
             fixed (int* prgkeyCard = keyCards)
+            fixed (byte* prgbValueCount = valueCountsBytes)
             {
                 for (int iid = 0; iid < names.Length; iid++)
                     names[iid] = prgbNames + nameIndices[iid];
 
                 DataViewBlock block;
-                block.ccol = allNames.Count;
+                block.ccol = nameIndices.Length;
                 block.crow = view.GetRowCount() ?? 0;
                 block.names = (sbyte**)prgname;
                 block.kinds = prgkind;
                 block.keyCards = prgkeyCard;
+                block.valueCounts = prgbValueCount;
 
                 dataSink(penv, &block, out var setters, out var keyValueSetter);
 
@@ -298,8 +297,13 @@ private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock* penv,
                                 keyIndex++;
                             }
                         }
-                        fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], kinds[pyColumn], type, setters[pyColumn]);
-                        pyColumn += type is VectorDataViewType ? type.GetVectorSize() : 1;
+                        fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], prgkind[pyColumn], type, setters[pyColumn]);
+
+                        if ((type is VectorDataViewType) && (type.GetVectorSize() > 0))
+                        {
+                            pyColumn += type.GetVectorSize();
+                        }
+                        else pyColumn++;
                     }
                     for (int crow = 0; ; crow++)
                     {
@@ -317,23 +321,180 @@ private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock* penv,
             }
         }
 
-        private static string AddUniqueName(string name, HashSet<string> allNames, List<int> nameIndices, List<Byte> nameUtf8Bytes)
+        private static unsafe void SendViewToNativeAsCsr(IChannel ch, EnvironmentBlock* penv, IDataView view)
+        {
+            Contracts.AssertValue(ch);
+            Contracts.Assert(penv != null);
+            Contracts.AssertValue(view);
+            if (penv->dataSink == null)
+            {
+                // Environment doesn't want any data!
+                return;
+            }
+
+            var dataSink = MarshalDelegate<DataSink>(penv->dataSink);
+
+            var schema = view.Schema;
+            var colIndices = new List<int>();
+            var outputDataKind = InternalDataKind.R4;
+
+            int numOutputRows = 0;
+            int numOutputCols = 0;
+
+            for (int col = 0; col < schema.Count; col++)
+            {
+                if (schema[col].IsHidden)
+                    continue;
+
+                var fullType = schema[col].Type;
+                var itemType = fullType.GetItemType();
+                int valueCount = fullType.GetValueCount();
+
+                if (valueCount == 0)
+                {
+                    throw ch.ExceptNotSupp("Column has variable length vector: " +
+                        schema[col].Name + ". Not supported in python. Drop column before sending to Python");
+                }
+
+                if (itemType.IsStandardScalar())
+                {
+                    switch (itemType.GetRawKind())
+                    {
+                    default:
+                        throw Contracts.Except("Data type {0} not supported", itemType.GetRawKind());
+
+                    case InternalDataKind.I1:
+                    case InternalDataKind.I2:
+                    case InternalDataKind.U1:
+                    case InternalDataKind.U2:
+                    case InternalDataKind.R4:
+                        break;
+
+                    case InternalDataKind.I4:
+                    case InternalDataKind.U4:
+                    case InternalDataKind.I8:
+                    case InternalDataKind.R8:
+                        outputDataKind = InternalDataKind.R8;
+                        break;
+                    }
+                }
+                else
+                {
+                    throw Contracts.Except("Data type {0} not supported", itemType.GetRawKind());
+                }
+
+                colIndices.Add(col);
+                numOutputCols += valueCount;
+            }
+
+            var nameIndices = new ValueListBuilder<int>(10);
+            var nameUtf8Bytes = new ValueListBuilder<byte>(100);
+
+            AddUniqueName("data", ref nameIndices, ref nameUtf8Bytes);
+            AddUniqueName("indices", ref nameIndices, ref nameUtf8Bytes);
+            AddUniqueName("indptr", ref nameIndices, ref nameUtf8Bytes);
+            AddUniqueName("shape", ref nameIndices, ref nameUtf8Bytes);
+
+            var kindList = new List<InternalDataKind> {outputDataKind,
+                                                       InternalDataKind.I4,
+                                                       InternalDataKind.I4,
+                                                       InternalDataKind.I4};
+
+            var valueCounts = new List<byte> { 1, 1, 1, 1 };
+
+            var kinds = kindList.ToArray();
+            var nameBytes = nameUtf8Bytes.AsSpan();
+            var names = new byte*[nameIndices.Length];
+            var valueCountsBytes = valueCounts.ToArray();
+
+            fixed (InternalDataKind* prgkind = kinds)
+            fixed (byte* prgbNames = nameBytes)
+            fixed (byte** prgname = names)
+            fixed (byte* prgbValueCount = valueCountsBytes)
+            {
+                for (int iid = 0; iid < names.Length; iid++)
+                    names[iid] = prgbNames + nameIndices[iid];
+
+                DataViewBlock block;
+                block.ccol = nameIndices.Length;
+                block.crow = view.GetRowCount() ?? 0;
+                block.names = (sbyte**)prgname;
+                block.kinds = prgkind;
+                block.keyCards = null;
+                block.valueCounts = prgbValueCount;
+
+                dataSink(penv, &block, out var setters, out var keyValueSetter);
+
+                if (setters == null) return;
+
+                using (var cursor = view.GetRowCursor(view.Schema.Where(col => colIndices.Contains(col.Index))))
+                {
+                    CsrData csrData = new CsrData(penv, setters, outputDataKind);
+                    var fillers = new CsrFillerBase[colIndices.Count];
+
+                    for (int i = 0; i < colIndices.Count; i++)
+                    {
+                        var type = schema[colIndices[i]].Type;
+                        fillers[i] = CsrFillerBase.Create(penv, cursor, colIndices[i], type, outputDataKind, csrData);
+                    }
+
+                    for (;; numOutputRows++)
+                    {
+                        if (!cursor.MoveNext()) break;
+
+                        for (int i = 0; i < fillers.Length; i++)
+                        {
+                            fillers[i].Set();
+                        }
+
+                        csrData.IncrementRow();
+                    }
+
+                    csrData.SetShape(numOutputRows, numOutputCols);
+                }
+            }
+        }
+
+        private static void AddUniqueName(string name,
+            ref ValueListBuilder<int> nameIndices, 
+            ref ValueListBuilder<byte> utf8Names)
+        {
+            if (utf8Names.Capacity - utf8Names.Length < name.Length * 2 + 2)
+                utf8Names.Grow();
+
+            nameIndices.Append(utf8Names.Length);
+            var bytesNumber = Encoding.UTF8.GetBytes(name, 0, name.Length, utf8Names.Buffer, utf8Names.Length);
+            utf8Names.Length += bytesNumber;
+            utf8Names.Append(0);
+        }
+
+        private static void AddUniqueName(
+            string columnName,
+            VBuffer<ReadOnlyMemory<char>> slotNames,
+            ref ValueListBuilder<int> nameIndices,
+            ref ValueListBuilder<byte> utf8Names)
         {
-            string newName = name;
-            int i = 1;
-            while (!allNames.Add(newName))
-                newName = string.Format(CultureInfo.InvariantCulture, "{0}_{1}", name, i++);
-            // REVIEW: Column names should not be affected by the slot names. They should always win against slot names.
-            byte[] bNewName = Encoding.UTF8.GetBytes(newName);
-            nameIndices.Add(nameUtf8Bytes.Count);
-            nameUtf8Bytes.AddRange(bNewName);
-            nameUtf8Bytes.Add(0);
-            return newName;
+            var columnNameBytes = Encoding.UTF8.GetBytes(columnName);
+            var dotBytes = Encoding.UTF8.GetBytes(".");
+
+            foreach (var kvp in slotNames.Items(true))
+            {
+                // REVIEW: Add the proper number of zeros to the slot index to make them sort in the right order.
+                var slotName = (!kvp.Value.IsEmpty ? kvp.Value.ToString() : kvp.Key.ToString(CultureInfo.InvariantCulture));
+                if (utf8Names.Capacity - utf8Names.Length < slotName.Length * 2 + columnNameBytes.Length + dotBytes.Length)
+                    utf8Names.Grow();
+                nameIndices.Append(utf8Names.Length);
+                utf8Names.AppendRange(columnNameBytes);
+                utf8Names.AppendRange(dotBytes);
+                var bytesNumber = Encoding.UTF8.GetBytes(slotName, 0, slotName.Length, utf8Names.Buffer, utf8Names.Length);
+                utf8Names.Length += bytesNumber;
+                utf8Names.Append(0);
+            }
         }
 
         private abstract unsafe class BufferFillerBase
         {
-            public delegate void ValuePoker<T>(T value, int col, long index);
+            public delegate void ValuePoker<T>(T value, int col, long m, long n);
 
             protected readonly int _colIndex;
             protected readonly DataViewRow _input;
@@ -357,23 +518,23 @@ public static BufferFillerBase Create(EnvironmentBlock* penv, DataViewRow input,
                     case InternalDataKind.U1:
                         var fnI1 = MarshalDelegate<I1Setter>(setter);
                         ValuePoker<byte> pokeU1 =
-                            (byte value, int col, long index) => fnI1(penv, col, index, value > keyMax ? (sbyte)-1 : (sbyte)(value - 1));
+                            (byte value, int col, long m, long n) => fnI1(penv, col, m, n, value > keyMax ? (sbyte)-1 : (sbyte)(value - 1));
                         return new Impl<byte>(input, pyCol, idvCol, type, pokeU1);
                     case InternalDataKind.U2:
                         var fnI2 = MarshalDelegate<I2Setter>(setter);
                         ValuePoker<ushort> pokeU2 =
-                            (ushort value, int col, long index) => fnI2(penv, col, index, value > keyMax ? (short)-1 : (short)(value - 1));
+                            (ushort value, int col, long m, long n) => fnI2(penv, col, m, n, value > keyMax ? (short)-1 : (short)(value - 1));
                         return new Impl<ushort>(input, pyCol, idvCol, type, pokeU2);
                     case InternalDataKind.U4:
                         var fnI4 = MarshalDelegate<I4Setter>(setter);
                         ValuePoker<uint> pokeU4 =
-                            (uint value, int col, long index) => fnI4(penv, col, index, value > keyMax ? -1 : (int)(value - 1));
+                            (uint value, int col, long m, long n) => fnI4(penv, col, m, n, value > keyMax ? -1 : (int)(value - 1));
                         return new Impl<uint>(input, pyCol, idvCol, type, pokeU4);
                     case InternalDataKind.U8:
                         // We convert U8 key types with key names to I4.
                         fnI4 = MarshalDelegate<I4Setter>(setter);
                         ValuePoker<ulong> pokeU8 =
-                            (ulong value, int col, long index) => fnI4(penv, col, index, value > keyMax ? -1 : (int)(value - 1));
+                            (ulong value, int col, long m, long n) => fnI4(penv, col, m, n, value > keyMax ? -1 : (int)(value - 1));
                         return new Impl<ulong>(input, pyCol, idvCol, type, pokeU8);
                     }
                 }
@@ -385,23 +546,23 @@ public static BufferFillerBase Create(EnvironmentBlock* penv, DataViewRow input,
                     case InternalDataKind.U1:
                         var fnI1 = MarshalDelegate<I1Setter>(setter);
                         ValuePoker<byte> pokeU1 =
-                            (byte value, int col, long index) => fnI1(penv, col, index, (sbyte)(value - 1));
+                            (byte value, int col, long m, long n) => fnI1(penv, col, m, n, (sbyte)(value - 1));
                         return new Impl<byte>(input, pyCol, idvCol, type, pokeU1);
                     case InternalDataKind.U2:
                         var fnI2 = MarshalDelegate<I2Setter>(setter);
                         ValuePoker<ushort> pokeU2 =
-                            (ushort value, int col, long index) => fnI2(penv, col, index, (short)(value - 1));
+                            (ushort value, int col, long m, long n) => fnI2(penv, col, m, n, (short)(value - 1));
                         return new Impl<ushort>(input, pyCol, idvCol, type, pokeU2);
                     case InternalDataKind.U4:
                         var fnI4 = MarshalDelegate<I4Setter>(setter);
                         ValuePoker<uint> pokeU4 =
-                            (uint value, int col, long index) => fnI4(penv, col, index, (int)(value - 1));
+                            (uint value, int col, long m, long n) => fnI4(penv, col, m, n, (int)(value - 1));
                         return new Impl<uint>(input, pyCol, idvCol, type, pokeU4);
                     case InternalDataKind.U8:
                         // We convert U8 key types with key names to I4.
                         fnI4 = MarshalDelegate<I4Setter>(setter);
                         ValuePoker<ulong> pokeU8 =
-                            (ulong value, int col, long index) => fnI4(penv, col, index, (int)(value - 1));
+                            (ulong value, int col, long m, long n) => fnI4(penv, col, m, n, (int)(value - 1));
                         return new Impl<ulong>(input, pyCol, idvCol, type, pokeU8);
                     }
                 }
@@ -412,70 +573,81 @@ public static BufferFillerBase Create(EnvironmentBlock* penv, DataViewRow input,
                     case InternalDataKind.R4:
                         var fnR4 = MarshalDelegate<R4Setter>(setter);
                         ValuePoker<float> pokeR4 =
-                            (float value, int col, long index) => fnR4(penv, col, index, value);
+                            (float value, int col, long m, long n) => fnR4(penv, col, m, n, value);
                         return new Impl<float>(input, pyCol, idvCol, type, pokeR4);
                     case InternalDataKind.R8:
                         var fnR8 = MarshalDelegate<R8Setter>(setter);
                         ValuePoker<double> pokeR8 =
-                            (double value, int col, long index) => fnR8(penv, col, index, value);
+                            (double value, int col, long m, long n) => fnR8(penv, col, m, n, value);
                         return new Impl<double>(input, pyCol, idvCol, type, pokeR8);
                     case InternalDataKind.BL:
                         var fnBl = MarshalDelegate<BLSetter>(setter);
                         ValuePoker<bool> pokeBl =
-                            (bool value, int col, long index) => fnBl(penv, col, index, !value ? (byte)0 : value ? (byte)1 : (byte)0xFF);
+                            (bool value, int col, long m, long n) => fnBl(penv, col, m, n, !value ? (byte)0 : value ? (byte)1 : (byte)0xFF);
                         return new Impl<bool>(input, pyCol, idvCol, type, pokeBl);
                     case InternalDataKind.I1:
                         var fnI1 = MarshalDelegate<I1Setter>(setter);
                         ValuePoker<sbyte> pokeI1 =
-                            (sbyte value, int col, long index) => fnI1(penv, col, index, value);
+                            (sbyte value, int col, long m, long n) => fnI1(penv, col, m, n, value);
                         return new Impl<sbyte>(input, pyCol, idvCol, type, pokeI1);
                     case InternalDataKind.I2:
                         var fnI2 = MarshalDelegate<I2Setter>(setter);
                         ValuePoker<short> pokeI2 =
-                            (short value, int col, long index) => fnI2(penv, col, index, value);
+                            (short value, int col, long m, long n) => fnI2(penv, col, m, n, value);
                         return new Impl<short>(input, pyCol, idvCol, type, pokeI2);
                     case InternalDataKind.I4:
                         var fnI4 = MarshalDelegate<I4Setter>(setter);
                         ValuePoker<int> pokeI4 =
-                            (int value, int col, long index) => fnI4(penv, col, index, value);
+                            (int value, int col, long m, long n) => fnI4(penv, col, m, n, value);
                         return new Impl<int>(input, pyCol, idvCol, type, pokeI4);
                     case InternalDataKind.I8:
                         var fnI8 = MarshalDelegate<I8Setter>(setter);
                         ValuePoker<long> pokeI8 =
-                            (long value, int col, long index) => fnI8(penv, col, index, value);
+                            (long value, int col, long m, long n) => fnI8(penv, col, m, n, value);
                         return new Impl<long>(input, pyCol, idvCol, type, pokeI8);
                     case InternalDataKind.U1:
                         var fnU1 = MarshalDelegate<U1Setter>(setter);
                         ValuePoker<byte> pokeU1 =
-                            (byte value, int col, long index) => fnU1(penv, col, index, value);
+                            (byte value, int col, long m, long n) => fnU1(penv, col, m, n, value);
                         return new Impl<byte>(input, pyCol, idvCol, type, pokeU1);
                     case InternalDataKind.U2:
                         var fnU2 = MarshalDelegate<U2Setter>(setter);
                         ValuePoker<ushort> pokeU2 =
-                            (ushort value, int col, long index) => fnU2(penv, col, index, value);
+                            (ushort value, int col, long m, long n) => fnU2(penv, col, m, n, value);
                         return new Impl<ushort>(input, pyCol, idvCol, type, pokeU2);
                     case InternalDataKind.U4:
                         var fnU4 = MarshalDelegate<U4Setter>(setter);
                         ValuePoker<uint> pokeU4 =
-                            (uint value, int col, long index) => fnU4(penv, col, index, value);
+                            (uint value, int col, long m, long n) => fnU4(penv, col, m, n, value);
                         return new Impl<uint>(input, pyCol, idvCol, type, pokeU4);
                     case InternalDataKind.U8:
                         var fnU8 = MarshalDelegate<U8Setter>(setter);
                         ValuePoker<ulong> pokeU8 =
-                            (ulong value, int col, long index) => fnU8(penv, col, index, value);
+                            (ulong value, int col, long m, long n) => fnU8(penv, col, m, n, value);
                         return new Impl<ulong>(input, pyCol, idvCol, type, pokeU8);
+                    case InternalDataKind.DT:
+                        var fnDT = MarshalDelegate<I8Setter>(setter);
+                        ValuePoker<DateTime> pokeDT =
+                            (DateTime value, int col, long m, long n) =>
+                            {
+                                DateTimeOffset dto = (value.Kind == DateTimeKind.Unspecified) ? 
+                                                     new DateTimeOffset(value, TimeSpan.Zero) :
+                                                     new DateTimeOffset(value);
+                                fnDT(penv, col, m, n, dto.ToUnixTimeMilliseconds());
+                            };
+                        return new Impl<DateTime>(input, pyCol, idvCol, type, pokeDT);
                     case InternalDataKind.TX:
                         var fnTX = MarshalDelegate<TXSetter>(setter);
                         ValuePoker<ReadOnlyMemory<char>> pokeTX =
-                            (ReadOnlyMemory<char> value, int col, long index) =>
+                            (ReadOnlyMemory<char> value, int col, long m, long n) =>
                             {
                                 if (value.IsEmpty)
-                                    fnTX(penv, col, index, null, 0);
+                                    fnTX(penv, col, m, n, null, 0);
                                 else
                                 {
                                     byte[] bt = Encoding.UTF8.GetBytes(value.ToString());
                                     fixed (byte* pt = bt)
-                                        fnTX(penv, col, index, (sbyte*)pt, bt.Length);
+                                        fnTX(penv, col, m, n, (sbyte*)pt, bt.Length);
                                 }
                             };
                         return new Impl<ReadOnlyMemory<char>>(input, pyCol, idvCol, type, pokeTX);
@@ -496,6 +668,7 @@ private sealed class Impl<TSrc> : BufferFillerBase
                 private VBuffer<TSrc> _buffer;
                 private readonly ValueGetter<TSrc> _get;
                 private readonly ValuePoker<TSrc> _poker;
+                private readonly bool _isVarLength;
 
                 public Impl(DataViewRow input, int pyColIndex, int idvColIndex, DataViewType type, ValuePoker<TSrc> poker)
                     : base(input, pyColIndex)
@@ -509,6 +682,7 @@ public Impl(DataViewRow input, int pyColIndex, int idvColIndex, DataViewType typ
                         _get = RowCursorUtils.GetGetterAs<TSrc>(type, input, idvColIndex);
 
                     _poker = poker;
+                    _isVarLength = (type.GetValueCount() == 0);
                 }
                 public override void Set()
                 {
@@ -519,7 +693,9 @@ public override void Set()
                         {
                             for (int i = 0; i < _buffer.Length; i++)
                             {
-                                _poker(_buffer.GetValues()[i], _colIndex + i, _input.Position);
+                                if (_isVarLength)
+                                     _poker(_buffer.GetValues()[i], _colIndex, _input.Position, i);
+                                else _poker(_buffer.GetValues()[i], _colIndex + i, _input.Position, 0);
                             }
                         }
                         else
@@ -534,7 +710,10 @@ public override void Set()
                                 TSrc val = default(TSrc);
                                 if (ii < values.Length && indices[ii] == i)
                                     val = values[ii];
-                                _poker(val, _colIndex + i, _input.Position);
+
+                                if (_isVarLength)
+                                     _poker(val, _colIndex, _input.Position, i);
+                                else _poker(val, _colIndex + i, _input.Position, 0);
                             }
                         }
                     }
@@ -542,7 +721,239 @@ public override void Set()
                     {
                         TSrc value = default(TSrc);
                         _get(ref value);
-                        _poker(value, _colIndex, _input.Position);
+                        _poker(value, _colIndex, _input.Position, 0);
+                    }
+                }
+            }
+        }
+
+        private unsafe class CsrData
+        {
+            private const int DataCol = 0;
+            private const int IndicesCol = 1;
+            private const int IndPtrCol = 2;
+            private const int ShapeCol = 3;
+
+            private readonly R4Setter _r4DataSetter;
+            private readonly R8Setter _r8DataSetter;
+            private readonly I4Setter _indicesSetter;
+            private readonly I4Setter _indptrSetter;
+            private readonly I4Setter _shapeSetter;
+
+            public int col;
+
+            private int _row;
+            private int _index;
+
+            private EnvironmentBlock* _penv;
+
+            public CsrData(EnvironmentBlock* penv, void** setters, InternalDataKind outputDataKind)
+            {
+                col = 0;
+
+                _row = 0;
+                _index = 0;
+                _penv = penv;
+
+                if (outputDataKind == InternalDataKind.R4)
+                {
+                    _r4DataSetter = MarshalDelegate<R4Setter>(setters[DataCol]);
+                    _r8DataSetter = null;
+                }
+                else if(outputDataKind == InternalDataKind.R8)
+                {
+                    _r4DataSetter = null;
+                    _r8DataSetter = MarshalDelegate<R8Setter>(setters[DataCol]);
+                }
+
+                _indicesSetter = MarshalDelegate<I4Setter>(setters[IndicesCol]);
+                _indptrSetter = MarshalDelegate<I4Setter>(setters[IndPtrCol]);
+                _shapeSetter = MarshalDelegate<I4Setter>(setters[ShapeCol]);
+
+                _indptrSetter(_penv, IndPtrCol, 0, 0, 0);
+            }
+
+            public void AppendR4(float value, int col)
+            {
+                _r4DataSetter(_penv, DataCol, _index, 0, value);
+                _indicesSetter(_penv, IndicesCol, _index, 0, col);
+                _index++;
+            }
+
+            public void AppendR8(double value, int col)
+            {
+                _r8DataSetter(_penv, DataCol, _index, 0, value);
+                _indicesSetter(_penv, IndicesCol, _index, 0, col);
+                _index++;
+            }
+
+            public void IncrementRow()
+            {
+                col = 0;
+                _row++;
+
+                _indptrSetter(_penv, IndPtrCol, _row, 0, _index);
+            }
+
+            public void SetShape(int m, int n)
+            {
+                _shapeSetter(_penv, ShapeCol, 0, 0, m);
+                _shapeSetter(_penv, ShapeCol, 1, 0, n);
+            }
+        }
+
+        private abstract unsafe class CsrFillerBase
+        {
+            public delegate void DataAppender<T>(T value, int col);
+
+            protected CsrFillerBase() {}
+
+            public static CsrFillerBase Create(EnvironmentBlock* penv,
+                                               DataViewRow input,
+                                               int idvCol,
+                                               DataViewType idvColType,
+                                               InternalDataKind outputDataKind,
+                                               CsrData csrData)
+            {
+                if (outputDataKind == InternalDataKind.R4)
+                {
+                    switch (idvColType.GetItemType().GetRawKind())
+                    {
+                    case InternalDataKind.I1:
+                        DataAppender<sbyte> appendI1 = (sbyte val, int i) => csrData.AppendR4((float)val, i);
+                        return new CsrFiller<sbyte>(input, idvCol, idvColType, appendI1, csrData);
+                    case InternalDataKind.I2:
+                        DataAppender<short> appendI2 = (short val, int i) => csrData.AppendR4((float)val, i);
+                        return new CsrFiller<short>(input, idvCol, idvColType, appendI2, csrData);
+                    case InternalDataKind.U1:
+                        DataAppender<byte> appendU1 = (byte val, int i) => csrData.AppendR4((float)val, i);
+                        return new CsrFiller<byte>(input, idvCol, idvColType, appendU1, csrData);
+                    case InternalDataKind.U2:
+                        DataAppender<ushort> appendU2 = (ushort val, int i) => csrData.AppendR4((float)val, i);
+                        return new CsrFiller<ushort>(input, idvCol, idvColType, appendU2, csrData);
+                    case InternalDataKind.R4:
+                        DataAppender<float> appendR4 = (float val, int i) => csrData.AppendR4((float)val, i);
+                        return new CsrFiller<float>(input, idvCol, idvColType, appendR4, csrData);
+                    default:
+                        throw Contracts.Except("Source data type not supported");
+                    }
+                }
+                else if (outputDataKind == InternalDataKind.R8)
+                {
+                    switch (idvColType.GetItemType().GetRawKind())
+                    {
+                    case InternalDataKind.I1:
+                        DataAppender<sbyte> appendI1 = (sbyte val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<sbyte>(input, idvCol, idvColType, appendI1, csrData);
+                    case InternalDataKind.I2:
+                        DataAppender<short> appendI2 = (short val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<short>(input, idvCol, idvColType, appendI2, csrData);
+                    case InternalDataKind.I4:
+                        DataAppender<int> appendI4 = (int val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<int>(input, idvCol, idvColType, appendI4, csrData);
+                    case InternalDataKind.U1:
+                        DataAppender<byte> appendU1 = (byte val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<byte>(input, idvCol, idvColType, appendU1, csrData);
+                    case InternalDataKind.U2:
+                        DataAppender<ushort> appendU2 = (ushort val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<ushort>(input, idvCol, idvColType, appendU2, csrData);
+                    case InternalDataKind.U4:
+                        DataAppender<uint> appendU4 = (uint val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<uint>(input, idvCol, idvColType, appendU4, csrData);
+                    case InternalDataKind.R4:
+                        DataAppender<float> appendR4 = (float val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<float>(input, idvCol, idvColType, appendR4, csrData);
+                    case InternalDataKind.I8:
+                        DataAppender<long> appendI8 = (long val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<long>(input, idvCol, idvColType, appendI8, csrData);
+                    case InternalDataKind.R8:
+                        DataAppender<double> appendR8 = (double val, int i) => csrData.AppendR8((double)val, i);
+                        return new CsrFiller<double>(input, idvCol, idvColType, appendR8, csrData);
+                    default:
+                        throw Contracts.Except("Source data type not supported");
+                    }
+                }
+
+                throw Contracts.Except("Target data type not supported.");
+            }
+
+            public abstract void Set();
+
+            private sealed class CsrFiller<TSrc> : CsrFillerBase
+            {
+                private readonly ValueGetter<VBuffer<TSrc>> _getVec;
+                private readonly ValueGetter<TSrc> _get;
+                private VBuffer<TSrc> _buffer;
+
+                private CsrData _csrData;
+                private readonly DataAppender<TSrc> _dataAppender;
+
+                private readonly IEqualityComparer<TSrc> comparer = EqualityComparer<TSrc>.Default;
+
+                public CsrFiller(DataViewRow input,
+                                 int idvColIndex,
+                                 DataViewType type,
+                                 DataAppender<TSrc> dataAppender,
+                                 CsrData csrData)
+                    : base()
+                {
+                    Contracts.AssertValue(input);
+                    Contracts.Assert(0 <= idvColIndex && idvColIndex < input.Schema.Count);
+
+                    if (type is VectorDataViewType)
+                        _getVec = RowCursorUtils.GetVecGetterAs<TSrc>((PrimitiveDataViewType)type.GetItemType(), input, idvColIndex);
+                    else
+                        _get = RowCursorUtils.GetGetterAs<TSrc>(type, input, idvColIndex);
+
+                    _csrData = csrData;
+                    _dataAppender = dataAppender;
+                }
+
+                public bool IsDefault(TSrc t)
+                {
+                    return comparer.Equals(t, default(TSrc));
+                }
+
+                public override void Set()
+                {
+                    if (_getVec != null)
+                    {
+                        _getVec(ref _buffer);
+                        if (_buffer.IsDense)
+                        {
+                            var values = _buffer.GetValues();
+
+                            for (int i = 0; i < values.Length; i++)
+                            {
+                                if (!IsDefault(values[i]))
+                                    _dataAppender(values[i], _csrData.col);
+
+                                _csrData.col++;
+                            }
+                        }
+                        else
+                        {
+                            var values = _buffer.GetValues();
+                            var indices = _buffer.GetIndices();
+
+                            for (int i = 0; i < values.Length; i++)
+                            {
+                                if (!IsDefault(values[i]))
+                                    _dataAppender(values[i], _csrData.col + indices[i]);
+                            }
+
+                            _csrData.col += _buffer.Length;
+                        }
+                    }
+                    else
+                    {
+                        TSrc value = default(TSrc);
+                        _get(ref value);
+
+                        if (!IsDefault(value))
+                            _dataAppender(value, _csrData.col);
+
+                        _csrData.col++;
                     }
                 }
             }
diff --git a/src/DotNetBridge/NativeDataView.cs b/src/DotNetBridge/NativeDataView.cs
index 09796203..1a829889 100644
--- a/src/DotNetBridge/NativeDataView.cs
+++ b/src/DotNetBridge/NativeDataView.cs
@@ -8,13 +8,12 @@
 using System.Collections.Concurrent;
 using System.Linq;
 using System.Threading;
-using Microsoft.ML;
 using Microsoft.ML.Data;
 using Microsoft.ML.Internal.Utilities;
 using System.Threading.Tasks;
 using Microsoft.ML.Runtime;
 
-namespace Microsoft.MachineLearning.DotNetBridge
+namespace Microsoft.ML.DotNetBridge
 {
     public unsafe static partial class Bridge
     {
@@ -143,6 +142,10 @@ public NativeDataView(IHostEnvironment env, DataSourceBlock* pdata)
                         case InternalDataKind.Text:
                             columns.Add(new TextColumn(pdata, pdata->getters[c], c, name));
                             break;
+                        case InternalDataKind.DT:
+                            if (pdata->vecCards[c] == -1)
+                                columns.Add(new DateTimeColumn(pdata, pdata->getters[c], c, name));
+                            break;
                     }
                 }
 
@@ -867,6 +870,31 @@ public override void Dispose()
                 }
             }
 
+            private sealed class DateTimeColumn : Column<DateTime>
+            {
+                private I8Getter _getter;
+
+                public DateTimeColumn(DataSourceBlock* data, void* getter, int colIndex, string name)
+                    : base(data, colIndex, name, DateTimeDataViewType.Instance)
+                {
+                    _getter = MarshalDelegate<I8Getter>(getter);
+                }
+
+                public override void CopyOut(long index, Batch batch, ref DateTime value)
+                {
+                    Contracts.Check(Data != null, AlreadyDisposed);
+                    Contracts.Assert(0 <= index);
+                    _getter(Data, ColIndex, index, out var val);
+                    value = DateTimeOffset.FromUnixTimeMilliseconds(val).UtcDateTime;
+                }
+
+                public override void Dispose()
+                {
+                    _getter = null;
+                    base.Dispose();
+                }
+            }
+
             private sealed class TextColumn : Column<ReadOnlyMemory<char>>
             {
                 private TXGetter _getter;
diff --git a/src/DotNetBridge/RmlEnvironment.cs b/src/DotNetBridge/RmlEnvironment.cs
index d2e861fe..1bcc0f50 100644
--- a/src/DotNetBridge/RmlEnvironment.cs
+++ b/src/DotNetBridge/RmlEnvironment.cs
@@ -8,7 +8,7 @@
 using Microsoft.ML;
 using Microsoft.ML.Runtime;
 
-namespace Microsoft.MachineLearning.DotNetBridge
+namespace Microsoft.ML.DotNetBridge
 {
     internal class RmlEnvironment : HostEnvironmentBase<RmlEnvironment>
     {
@@ -55,7 +55,6 @@ protected override IHost RegisterCore(HostEnvironmentBase<RmlEnvironment> source
         public RmlEnvironment(Bridge.CheckCancelled checkDelegate, int? seed = null, bool verbose = false)
             : this(RandomUtils.Create(seed), verbose)
         {
-
             CheckCancelled = checkDelegate;
         }
 
diff --git a/src/DotNetBridge/RunGraph.cs b/src/DotNetBridge/RunGraph.cs
index c9d668fa..55f02795 100644
--- a/src/DotNetBridge/RunGraph.cs
+++ b/src/DotNetBridge/RunGraph.cs
@@ -9,8 +9,6 @@
 using System.IO;
 using System.Linq;
 using Microsoft.DataPrep.Common;
-using Microsoft.ML;
-using Microsoft.ML.CommandLine;
 using Microsoft.ML.Data;
 using Microsoft.ML.Data.IO;
 using Microsoft.ML.EntryPoints;
@@ -20,36 +18,15 @@
 using Newtonsoft.Json;
 using Newtonsoft.Json.Linq;
 
-namespace Microsoft.MachineLearning.DotNetBridge
+namespace Microsoft.ML.DotNetBridge
 {
     public unsafe static partial class Bridge
     {
         // std:null specifier in a graph, used to redirect output to std::null
         const string STDNULL = "<null>";
 
-        private sealed class RunGraphArgs
-        {
-#pragma warning disable 649 // never assigned
-            [Argument(ArgumentType.AtMostOnce)]
-            public string graph;
-
-            [Argument(ArgumentType.LastOccurenceWins, HelpText = "Desired degree of parallelism in the data pipeline", ShortName = "conc")]
-            public int? parallel;
-
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Random seed", ShortName = "seed")]
-            public int? randomSeed;
-
-            [Argument(ArgumentType.AtMostOnce, ShortName = "lab")]
-            public string labelColumn; //not used
-
-            [Argument(ArgumentType.Multiple, ShortName = "feat")]
-            public string[] featureColumn; //not used
-
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Max slots to return for vector valued columns (<=0 to return all)")]
-            public int maxSlots = -1;
-
-#pragma warning restore 649 // never assigned
-        }
+        // graph output format specifier, used to output to a sparse csr matrix
+        const string CSR_MATRIX = "<csr>";
 
         private static void SaveIdvToFile(IDataView idv, string path, IHost host)
         {
@@ -58,7 +35,15 @@ private static void SaveIdvToFile(IDataView idv, string path, IHost host)
             var extension = Path.GetExtension(path);
             IDataSaver saver;
             if (extension != ".csv" && extension != ".tsv" && extension != ".txt")
+            {
                 saver = new BinarySaver(host, new BinarySaver.Arguments());
+
+                var schemaFilePath = Path.GetDirectoryName(path) +
+                                     Path.DirectorySeparatorChar +
+                                     Path.GetFileNameWithoutExtension(path) +
+                                     ".schema";
+                SaveIdvSchemaToFile(idv, schemaFilePath, host);
+            }
             else
             {
                 var saverArgs = new TextSaver.Arguments
@@ -80,6 +65,25 @@ private static void SaveIdvToFile(IDataView idv, string path, IHost host)
             }
         }
 
+        private static void SaveIdvSchemaToFile(IDataView idv, string path, IHost host)
+        {
+            var emptyDataView = new EmptyDataView(host, idv.Schema);
+            var saverArgs = new TextSaver.Arguments
+            {
+                OutputHeader = false,
+                OutputSchema = true,
+                Dense = true
+            };
+            IDataSaver saver = new TextSaver(host, saverArgs);
+
+            using (var fs = File.OpenWrite(path))
+            {
+                saver.SaveData(fs, emptyDataView, Utils.GetIdentityPermutation(emptyDataView.Schema.Count)
+                    .Where(x => !emptyDataView.Schema[x].IsHidden && saver.IsColumnSavable(emptyDataView.Schema[x].Type))
+                    .ToArray());
+            }
+        }
+
         private static void SavePredictorModelToFile(PredictorModel model, string path, IHost host)
         {
             using (var fs = File.OpenWrite(path))
@@ -90,19 +94,11 @@ private static void RunGraphCore(EnvironmentBlock* penv, IHostEnvironment env, s
         {
             Contracts.AssertValue(env);
 
-            var args = new RunGraphArgs();
-            string err = null;
-            if (!CmdParser.ParseArguments(env, graphStr, args, e => err = err ?? e))
-                throw env.Except(err);
-
-            int? maxThreadsAllowed = Math.Min(args.parallel > 0 ? args.parallel.Value : penv->maxThreadsAllowed, penv->maxThreadsAllowed);
-            maxThreadsAllowed = penv->maxThreadsAllowed > 0 ? maxThreadsAllowed : args.parallel;
-            var host = env.Register("RunGraph", args.randomSeed, null);
-
+            var host = env.Register("RunGraph", penv->seed, null);
             JObject graph;
             try
             {
-                graph = JObject.Parse(args.graph);
+                graph = JObject.Parse(graphStr);
             }
             catch (JsonReaderException ex)
             {
@@ -148,10 +144,7 @@ private static void RunGraphCore(EnvironmentBlock* penv, IHostEnvironment env, s
                                     if (extension == ".txt")
                                         dv = TextLoader.LoadFile(host, new TextLoader.Options(), new MultiFileSource(path));
                                     else if (extension == ".dprep")
-                                    {
-                                        DPrepSettings.Instance.PythonPath = BytesToString(penv->pythonPath);
-                                        dv = DataFlow.FromDPrepFile(path).ToDataView();
-                                    }
+                                        dv = LoadDprepFile(BytesToString(penv->pythonPath), path);
                                     else
                                         dv = new BinaryLoader(host, new BinaryLoader.Arguments(), path);
                                 }
@@ -218,14 +211,18 @@ private static void RunGraphCore(EnvironmentBlock* penv, IHostEnvironment env, s
                                     throw host.ExceptNotSupp("File handle outputs not yet supported.");
                                 case TlcModule.DataKind.DataView:
                                     var idv = runner.GetOutput<IDataView>(varName);
-                                    if (!string.IsNullOrWhiteSpace(path))
+                                    if (path == CSR_MATRIX)
+                                    {
+                                        SendViewToNativeAsCsr(ch, penv, idv);
+                                    }
+                                    else if (!string.IsNullOrWhiteSpace(path))
                                     {
                                         SaveIdvToFile(idv, path, host);
                                     }
                                     else
                                     {
-                                        var infos = ProcessColumns(ref idv, args.maxSlots, host);
-                                        SendViewToNative(ch, penv, idv, infos);
+                                        var infos = ProcessColumns(ref idv, penv->maxSlots, host);
+                                        SendViewToNativeAsDataFrame(ch, penv, idv, infos);
                                     }
                                     break;
                                 case TlcModule.DataKind.PredictorModel:
@@ -286,6 +283,12 @@ private static void RunGraphCore(EnvironmentBlock* penv, IHostEnvironment env, s
             }
         }
 
+        private static IDataView LoadDprepFile(string pythonPath, string path)
+        {
+            DPrepSettings.Instance.PythonPath = pythonPath;
+            return DataFlow.FromDPrepFile(path).ToDataView();
+        }
+
         private static Dictionary<string, ColumnMetadataInfo> ProcessColumns(ref IDataView view, int maxSlots, IHostEnvironment env)
         {
             Dictionary<string, ColumnMetadataInfo> result = null;
diff --git a/src/DotNetBridge/ValueListBuilder.cs b/src/DotNetBridge/ValueListBuilder.cs
new file mode 100644
index 00000000..418cd673
--- /dev/null
+++ b/src/DotNetBridge/ValueListBuilder.cs
@@ -0,0 +1,118 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Buffers;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace Microsoft.ML.DotNetBridge
+{
+    internal ref struct ValueListBuilder<T>
+    {
+        private Span<T> _span;
+        private T[] _arrayFromPool;
+        private int _pos;
+
+        public ValueListBuilder(Span<T> initialSpan)
+        {
+            _span = initialSpan;
+            _arrayFromPool = null;
+            _pos = 0;
+        }
+
+        public ValueListBuilder(int initialSize = 1024)
+        {
+            _arrayFromPool = ArrayPool<T>.Shared.Rent(initialSize);
+            _span = _arrayFromPool;
+            _pos = 0;
+        }
+
+        public int Length
+        {
+            get => _pos;
+            set
+            {
+                Debug.Assert(value >= 0);
+                Debug.Assert(value <= _span.Length);
+                _pos = value;
+            }
+        }
+
+        public int Capacity
+        {
+            get => _span.Length;
+        }
+
+        public ref T this[int index]
+        {
+            get
+            {
+                Debug.Assert(index < _pos);
+                return ref _span[index];
+            }
+        }
+
+        public T[] Buffer
+        {
+            get => _arrayFromPool;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Append(T item)
+        {
+            int pos = _pos;
+            if (pos >= _span.Length)
+                Grow();
+
+            _span[pos] = item;
+            _pos = pos + 1;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void AppendRange(T[] items)
+        {
+            int pos = _pos;
+            while(pos + items.Length >= _span.Length)
+                Grow();
+
+            foreach (T item in items)
+            {
+                _span[pos] = item;
+                _pos = pos + 1;
+                pos++;
+            }
+        }
+
+        public ReadOnlySpan<T> AsSpan()
+        {
+            return _span.Slice(0, _pos);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Dispose()
+        {
+            if (_arrayFromPool != null)
+            {
+                ArrayPool<T>.Shared.Return(_arrayFromPool);
+                _arrayFromPool = null;
+            }
+        }
+
+        public void Grow()
+        {
+            T[] array = ArrayPool<T>.Shared.Rent(_span.Length * 2);
+
+            bool success = _span.TryCopyTo(array);
+            Debug.Assert(success);
+
+            T[] toReturn = _arrayFromPool;
+            _span = _arrayFromPool = array;
+            if (toReturn != null)
+            {
+                ArrayPool<T>.Shared.Return(toReturn);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/DotNetBridge/transforms/VariableColumnTransform.cs b/src/DotNetBridge/transforms/VariableColumnTransform.cs
new file mode 100644
index 00000000..ea9ecafb
--- /dev/null
+++ b/src/DotNetBridge/transforms/VariableColumnTransform.cs
@@ -0,0 +1,337 @@
+﻿//------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//------------------------------------------------------------------------------
+
+using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+using Microsoft.ML.CommandLine;
+using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Transforms;
+using Microsoft.ML.Internal.Utilities;
+
+
+namespace Microsoft.ML.DotNetBridge
+{
+    using BitArray = System.Collections.BitArray;
+
+    /// <summary>
+    /// A transform that combines the specified input columns
+    /// in to a single variable length vectorized column and
+    /// passes the rest of the columns through unchanged.
+    /// </summary>
+    [BestFriend]
+    internal sealed class VariableColumnTransform : IDataTransform, IRowToRowMapper
+    {
+        public class Options : TransformInputBase
+        {
+            [Argument(ArgumentType.Multiple, HelpText = "Features", SortOrder = 2)]
+            public string[] Features;
+
+            [Argument(ArgumentType.Multiple, HelpText = "Length Column Name", SortOrder = 2)]
+            public string LengthColumnName;
+        }
+
+        private sealed class Bindings
+        {
+            public readonly List<int> outputToInputMap;
+            public readonly List<int> vectorToInputMap;
+            public int outputColumn;
+            public int lengthColumn;
+
+            public Bindings()
+            {
+                outputToInputMap = new List<int>();
+                vectorToInputMap = new List<int>();
+                outputColumn = -1;
+                lengthColumn = -1;
+            }
+        }
+
+        private readonly IHost _host;
+        private readonly Bindings _bindings;
+        private readonly HashSet<string> _columnNames;
+
+        public IDataView Source { get; }
+
+        DataViewSchema IRowToRowMapper.InputSchema => Source.Schema;
+
+        private VariableColumnTransform(IHostEnvironment env, IDataView input, string[] features, string lengthColumnName)
+        {
+            Contracts.CheckValue(env, nameof(env));
+
+            Source = input;
+            _host = env.Register(RegistrationName);
+            _bindings = new Bindings();
+
+            _columnNames = (features == null) ? new HashSet<string>() :
+                                                new HashSet<string>(features);
+
+            OutputSchema = ProcessInputSchema(input.Schema, lengthColumnName);
+        }
+
+        internal const string Summary = "Combines the specified input columns in to a single variable length vectorized column.";
+
+        public const string LoaderSignature = "VariableColumnTransform";
+
+        private static VersionInfo GetVersionInfo()
+        {
+            return new VersionInfo(
+                modelSignature: "VARLENCL",
+                verWrittenCur: 0x00010001, // Initial
+                verReadableCur: 0x00010001,
+                verWeCanReadBack: 0x00010001,
+                loaderSignature: LoaderSignature,
+                loaderAssemblyName: typeof(VariableColumnTransform).Assembly.FullName);
+        }
+
+        internal static string RegistrationName = "VariableColumnTransform";
+
+        public static VariableColumnTransform Create(IHostEnvironment env, Options options, IDataView input)
+        {
+            return new VariableColumnTransform(env, input, options.Features, options.LengthColumnName);
+        }
+
+        public static VariableColumnTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input)
+        {
+            Contracts.CheckValue(env, nameof(env));
+            var h = env.Register(RegistrationName);
+            h.CheckValue(ctx, nameof(ctx));
+            h.CheckValue(input, nameof(input));
+            ctx.CheckAtModel(GetVersionInfo());
+            return h.Apply("Loading Model", ch => new VariableColumnTransform(h, ctx, input));
+        }
+
+        private VariableColumnTransform(IHost host, ModelLoadContext ctx, IDataView input)
+        {
+            Contracts.AssertValue(host, nameof(host));
+            host.CheckValue(input, nameof(input));
+
+            Source = input;
+            _host = host;
+
+            // TODO: fill this in
+        }
+
+        void ICanSaveModel.Save(ModelSaveContext ctx)
+        {
+            _host.CheckValue(ctx, nameof(ctx));
+            ctx.CheckAtModel();
+            ctx.SetVersionInfo(GetVersionInfo());
+
+            // TODO: fill this in
+        }
+
+        public bool CanShuffle => Source.CanShuffle;
+
+        DataViewSchema IDataView.Schema => OutputSchema;
+        public DataViewSchema OutputSchema { get; }
+
+        private DataViewSchema ProcessInputSchema(DataViewSchema inputSchema, string lengthColumnName)
+        {
+            var builder = new DataViewSchema.Builder();
+            for (int i = 0; i < inputSchema.Count; i++)
+            {
+                var name = inputSchema[i].Name;
+
+                if (_columnNames.Contains(name))
+                {
+                    _bindings.vectorToInputMap.Add(i);
+                }
+                else if (name == lengthColumnName)
+                {
+                    _bindings.lengthColumn = i;
+                }
+                else
+                {
+                    builder.AddColumn(name, inputSchema[i].Type);
+                    _bindings.outputToInputMap.Add(i);
+                }
+            }
+
+            if (_bindings.vectorToInputMap.Count > 0)
+            {
+                var type = inputSchema[_bindings.vectorToInputMap[0]].Type as PrimitiveDataViewType;
+
+                for (int i = 1; i < _bindings.vectorToInputMap.Count; i++)
+                {
+                    var nextType = inputSchema[_bindings.vectorToInputMap[i]].Type as PrimitiveDataViewType;
+                    if (!nextType.Equals(type))
+                    {
+                        throw Contracts.Except("Input data types of the columns to vectorize must " +
+                                               "all be of the same type. Found {0} and {1}.",
+                                               type.ToString(),
+                                               nextType.ToString());
+                    }
+                }
+
+                var outputColumnType = new VectorDataViewType(type, 0);
+                var outputColumnName = inputSchema[_bindings.vectorToInputMap[0]].Name;
+                builder.AddColumn(outputColumnName, outputColumnType);
+
+                _bindings.outputColumn = _bindings.outputToInputMap.Count;
+            }
+
+            return builder.ToSchema();
+        }
+
+        public long? GetRowCount()
+        {
+            return Source.GetRowCount();
+        }
+
+        public DataViewRowCursor GetRowCursor(IEnumerable<DataViewSchema.Column> columnsNeeded, Random rand = null)
+        {
+            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);
+
+            _host.CheckValueOrNull(rand);
+            return new Cursor(_host, this, _bindings, predicate, rand);
+        }
+
+        public DataViewRowCursor[] GetRowCursorSet(IEnumerable<DataViewSchema.Column> columnsNeeded, int n, Random rand = null)
+        {
+            var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, OutputSchema);
+
+            _host.CheckValueOrNull(rand);
+            return new DataViewRowCursor[] { new Cursor(_host, this, _bindings, predicate, rand) };
+        }
+
+        private sealed class Cursor : RootCursorBase
+        {
+            private readonly IDataTransform _view;
+            private readonly BitArray _active;
+            private readonly Bindings _bindings;
+            private readonly DataViewRowCursor _cursor;
+
+            public override DataViewSchema Schema => _view.Schema;
+
+            public override long Batch
+            {
+                get { return 0; }
+            }
+
+            public Cursor(IChannelProvider provider, IDataTransform view, Bindings bindings, Func<int, bool> predicate, Random rand)
+                : base(provider)
+            {
+                Ch.AssertValue(view);
+                Ch.AssertValueOrNull(rand);
+                Ch.Assert(view.Schema.Count >= 0);
+
+                _view = view;
+                _bindings = bindings;
+                _cursor = view.Source.GetRowCursorForAllColumns();
+                _active = new BitArray(view.Schema.Count);
+
+                if (predicate == null) _active.SetAll(true);
+                else
+                {
+                    for (int i = 0; i < view.Schema.Count; ++i)
+                        _active[i] = predicate(i);
+                }
+            }
+
+            public override ValueGetter<DataViewRowId> GetIdGetter()
+            {
+                return (ref DataViewRowId val) =>
+                {
+                    Ch.Check(IsGood, RowCursorUtils.FetchValueStateError);
+                    val = new DataViewRowId((ulong)Position, 0);
+                };
+            }
+
+            public override bool IsColumnActive(DataViewSchema.Column column)
+            {
+                Ch.Check(column.Index < Schema.Count);
+                return _active[column.Index];
+            }
+
+            private Delegate MakeVarLengthVectorGetter<T>(DataViewRow input)
+            {
+                var srcGetters = new ValueGetter<T>[_bindings.vectorToInputMap.Count];
+                ValueGetter<long> lengthGetter = null;
+
+                for (int i = 0; i < _bindings.vectorToInputMap.Count; i++)
+                {
+                    var column = input.Schema[_bindings.vectorToInputMap[i]];
+                    srcGetters[i] = input.GetGetter<T>(column);
+                }
+
+                if (_bindings.lengthColumn >= 0)
+                {
+                    var column = input.Schema[_bindings.lengthColumn];
+                    lengthGetter = input.GetGetter<long>(column);
+                }
+
+                T tmp = default(T);
+                ValueGetter<VBuffer<T>> result = (ref VBuffer<T> dst) =>
+                {
+                    int length = _bindings.vectorToInputMap.Count;
+                    if (lengthGetter != null)
+                    {
+                        long expectedLength = length;
+                        lengthGetter(ref expectedLength);
+
+                        if ((expectedLength >= 0) && (expectedLength < length))
+                        {
+                            length = (int)expectedLength;
+                        }
+                    }
+
+                    var editor = VBufferEditor.Create(ref dst, length);
+
+                    for (int i = 0; i < length; i++)
+                    {
+                        srcGetters[i](ref tmp);
+                        editor.Values[i] = tmp;
+                    }
+
+                    dst = editor.Commit();
+                };
+                return result;
+            }
+
+            /// <summary>
+            /// Returns a value getter delegate to fetch the value of column with the given columnIndex, from the row.
+            /// This throws if the column is not active in this row, or if the type
+            /// <typeparamref name="TValue"/> differs from this column's type.
+            /// </summary>
+            /// <typeparam name="TValue"> is the column's content type.</typeparam>
+            /// <param name="column"> is the output column whose getter should be returned.</param>
+            public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column column)
+            {
+                if (column.Index == _bindings.outputColumn)
+                {
+                    VectorDataViewType columnType = column.Type as VectorDataViewType;
+                    Delegate getter = Utils.MarshalInvoke(MakeVarLengthVectorGetter<int>, columnType.ItemType.RawType, _cursor);
+                    return getter as ValueGetter<TValue>;
+                }
+                else
+                {
+                    int inputIndex = _bindings.outputToInputMap[column.Index];
+                    return _cursor.GetGetter<TValue>(_cursor.Schema[inputIndex]);
+                }
+            }
+
+            protected override bool MoveNextCore()
+            {
+                return _cursor.MoveNext();
+            }
+        }
+
+        /// <summary>
+        /// Given a set of columns, return the input columns that are needed to generate those output columns.
+        /// </summary>
+        IEnumerable<DataViewSchema.Column> IRowToRowMapper.GetDependencies(IEnumerable<DataViewSchema.Column> dependingColumns)
+            => dependingColumns;
+
+        DataViewRow IRowToRowMapper.GetRow(DataViewRow input, IEnumerable<DataViewSchema.Column> activeColumns)
+        {
+            Contracts.CheckValue(input, nameof(input));
+            Contracts.CheckValue(activeColumns, nameof(activeColumns));
+            Contracts.CheckParam(input.Schema == Source.Schema, nameof(input), "Schema of input row must be the same as the schema the mapper is bound to");
+            return input;
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/NativeBridge/DataViewInterop.cpp b/src/NativeBridge/DataViewInterop.cpp
index dd349012..d681df1f 100644
--- a/src/NativeBridge/DataViewInterop.cpp
+++ b/src/NativeBridge/DataViewInterop.cpp
@@ -7,317 +7,318 @@
 
 DataSourceBlock::DataSourceBlock(bp::dict& data)
 {
-	// Assert that this class doesn't have a vtable.
-	assert(offsetof(DataSourceBlock, ccol) == 0);
+    // Assert that this class doesn't have a vtable.
+    assert(offsetof(DataSourceBlock, ccol) == 0);
 
-	CxInt64 llTotalNumRows = -1;
-	assert(data.contains(PYTHON_DATA_KEY_INFO));
-	bp::dict varInfo = bp::extract<bp::dict>(data[PYTHON_DATA_KEY_INFO]);
+    CxInt64 llTotalNumRows = -1;
+    assert(data.contains(PYTHON_DATA_KEY_INFO));
+    bp::dict varInfo = bp::extract<bp::dict>(data[PYTHON_DATA_KEY_INFO]);
 
-	assert(data.contains(PYTHON_DATA_COL_TYPES));
-	bp::list colTypes = bp::extract<bp::list>(data[PYTHON_DATA_COL_TYPES]);
+    assert(data.contains(PYTHON_DATA_COL_TYPES));
+    bp::list colTypes = bp::extract<bp::list>(data[PYTHON_DATA_COL_TYPES]);
 
-	bp::stl_input_iterator<bp::object> keys(data.keys()), end1;
-	bp::stl_input_iterator<bp::object> values(data.values());
-	CxInt64 dataframeColCount = -1;
-	for (; keys != end1; keys++)
-	{
-		bp::object key = *keys;
-		char* name = bp::extract<char*>(key);
-		bp::object value = *values++;
-		if (strcmp(name, PYTHON_DATA_KEY_INFO) == 0 || strcmp(name, PYTHON_DATA_COL_TYPES) == 0)
-			continue;
+    bp::stl_input_iterator<bp::object> keys(data.keys()), end1;
+    bp::stl_input_iterator<bp::object> values(data.values());
+    CxInt64 dataframeColCount = -1;
+    for (; keys != end1; keys++)
+    {
+        bp::object key = *keys;
+        char* name = bp::extract<char*>(key);
+        bp::object value = *values++;
+        if (strcmp(name, PYTHON_DATA_KEY_INFO) == 0 || strcmp(name, PYTHON_DATA_COL_TYPES) == 0)
+            continue;
 
-		// now it should be a column names
-		std::string colName = bp::extract<std::string>(key);
-		dataframeColCount++;
-		auto tp = bp::extract<const char*>(colTypes[dataframeColCount]);
-		ML_PY_TYPE_MAP_ENUM colType = static_cast<ML_PY_TYPE_MAP_ENUM>(tp[0]);
+        // now it should be a column names
+        std::string colName = bp::extract<std::string>(key);
+        dataframeColCount++;
+        auto tp = bp::extract<const char*>(colTypes[dataframeColCount]);
+        ML_PY_TYPE_MAP_ENUM colType = static_cast<ML_PY_TYPE_MAP_ENUM>(tp[0]);
 
-		BYTE kind;
-		void *pgetter;
-		bool isKey = false;
-		bool isNumeric = false;
-		bool isText = false;
-		CxInt64 vecCard = -1;
-		// Numeric or bool values.
-		if (bp::extract<np::ndarray>(value).check())
-		{
-			isNumeric = true;
-			np::ndarray val = bp::extract<np::ndarray>(value);
-			switch (colType)
-			{
-			case (ML_PY_BOOL):
-				kind = BL;
-				pgetter = (void*)&GetBL;
-				break;
-			case (ML_PY_BOOL64):
-				kind = BL;
-				pgetter = (void*)&GetBL64;
-				break;
-			case (ML_PY_UINT8):
-				kind = U1;
-				pgetter = (void*)&GetU1;
-				break;
-			case (ML_PY_UINT16):
-				kind = U2;
-				pgetter = (void*)&GetU2;
-				break;
-			case (ML_PY_UINT32):
-				kind = U4;
-				pgetter = (void*)&GetU4;
-				break;
-			case (ML_PY_UINT64):
-				kind = U8;
-				pgetter = (void*)&GetU8;
-				break;
-			case (ML_PY_INT8):
-				kind = I1;
-				pgetter = (void*)&GetI1;
-				break;
-			case (ML_PY_INT16):
-				kind = I2;
-				pgetter = (void*)&GetI2;
-				break;
-			case (ML_PY_INT32):
-				kind = I4;
-				pgetter = (void*)&GetI4;
-				break;
-			case (ML_PY_INT64):
-				kind = I8;
-				pgetter = (void*)&GetI8;
-				break;
-			case (ML_PY_FLOAT16):
-				// What to do with numpy.float16 ?
-				throw std::invalid_argument("numpy.float16 data type is not supported");
-			case (ML_PY_FLOAT32):
-				kind = R4;
-				pgetter = (void*)&GetR4;
-				break;
-			case (ML_PY_FLOAT64):
-				kind = R8;
-				pgetter = (void*)&GetR8;
-				break;
-			default:
-				throw std::invalid_argument("column " + colName + " has unsupported type");
-			}
-			const char *data = val.get_data();
-			this->_vdata.push_back(data);
+        BYTE kind;
+        void *pgetter;
+        bool isKey = false;
+        bool isNumeric = false;
+        bool isText = false;
+        CxInt64 vecCard = -1;
+        // Numeric or bool values.
+        if (bp::extract<np::ndarray>(value).check())
+        {
+            isNumeric = true;
+            np::ndarray val = bp::extract<np::ndarray>(value);
+            switch (colType)
+            {
+            case (ML_PY_BOOL):
+                kind = BL;
+                pgetter = (void*)&GetBL;
+                break;
+            case (ML_PY_BOOL64):
+                kind = BL;
+                pgetter = (void*)&GetBL64;
+                break;
+            case (ML_PY_UINT8):
+                kind = U1;
+                pgetter = (void*)&GetU1;
+                break;
+            case (ML_PY_UINT16):
+                kind = U2;
+                pgetter = (void*)&GetU2;
+                break;
+            case (ML_PY_UINT32):
+                kind = U4;
+                pgetter = (void*)&GetU4;
+                break;
+            case (ML_PY_UINT64):
+                kind = U8;
+                pgetter = (void*)&GetU8;
+                break;
+            case (ML_PY_INT8):
+                kind = I1;
+                pgetter = (void*)&GetI1;
+                break;
+            case (ML_PY_INT16):
+                kind = I2;
+                pgetter = (void*)&GetI2;
+                break;
+            case (ML_PY_INT32):
+                kind = I4;
+                pgetter = (void*)&GetI4;
+                break;
+            case (ML_PY_INT64):
+                kind = I8;
+                pgetter = (void*)&GetI8;
+                break;
+            case (ML_PY_FLOAT16):
+                // What to do with numpy.float16 ?
+                throw std::invalid_argument("numpy.float16 data type is not supported");
+            case (ML_PY_FLOAT32):
+                kind = R4;
+                pgetter = (void*)&GetR4;
+                break;
+            case (ML_PY_FLOAT64):
+                kind = R8;
+                pgetter = (void*)&GetR8;
+                break;
+            case (ML_PY_DATETIME):
+                kind = DT;
+                pgetter = (void*)&GetI8;
+                break;
+            default:
+                throw std::invalid_argument("column " + colName + " has unsupported type");
+            }
+            const char *data = val.get_data();
+            this->_vdata.push_back(data);
 
-			assert(this->_mpnum.size() == dataframeColCount);
-			this->_mpnum.push_back(_vdata.size() - 1);
-			if (llTotalNumRows == -1)
-				llTotalNumRows = val.shape(0);
-			else
-				assert(llTotalNumRows == val.shape(0));
-		}
-		// Text or key values.
-		else if (bp::extract<bp::list>(value).check())
-		{
-			bp::list list = bp::extract<bp::list>(value);
+            assert(this->_mpnum.size() == dataframeColCount);
+            this->_mpnum.push_back(_vdata.size() - 1);
+            if (llTotalNumRows == -1)
+                llTotalNumRows = val.shape(0);
+            else
+                assert(llTotalNumRows == val.shape(0));
+        }
+        // Text or key values.
+        else if (bp::extract<bp::list>(value).check())
+        {
+            bp::list list = bp::extract<bp::list>(value);
 
-			// Key values.
-			switch (colType)
-			{
-			case (ML_PY_CAT):
-				if (varInfo.contains(colName))
-				{
-					isKey = true;
-					assert(bp::extract<bp::list>(varInfo[colName]).check());
-					bp::list keyNames = bp::extract<bp::list>(varInfo[colName]);
+            // Key values.
+            switch (colType)
+            {
+            case (ML_PY_CAT):
+                if (varInfo.contains(colName))
+                {
+                    isKey = true;
+                    assert(bp::extract<bp::list>(varInfo[colName]).check());
+                    bp::list keyNames = bp::extract<bp::list>(varInfo[colName]);
 
-					kind = U4;
-					pgetter = (void*)GetKeyInt;
+                    kind = U4;
+                    pgetter = (void*)GetKeyInt;
 
-					// TODO: Handle vectors.
-					this->_vkeyCard.push_back(len(keyNames));
-					//this->_vvecCard.push_back(vecCard);
-					this->_vkeydata.push_back(list);
-					this->_vkeynames.push_back(keyNames);
+                    // TODO: Handle vectors.
+                    this->_vkeyCard.push_back(len(keyNames));
+                    //this->_vvecCard.push_back(vecCard);
+                    this->_vkeydata.push_back(list);
+                    this->_vkeynames.push_back(keyNames);
 
-					assert(this->_mpkey.size() == dataframeColCount);
-					this->_mpkey.push_back(_vkeydata.size() - 1);
-					if (llTotalNumRows == -1)
-						llTotalNumRows = len(list);
-					else
-						assert(llTotalNumRows == len(list));
-				}
-				else
-					continue;
-				break;
-				// Text values.
-			case (ML_PY_TEXT):
-			case (ML_PY_UNICODE):
-				isText = true;
-				kind = TX;
-				if (colType == ML_PY_TEXT)
-					pgetter = (void*)GetTX;
-				else // colType is "unicode"
-					 // in python 2.7 strings can be passed as unicode bytestring (NOT the same as UTF8 encoded strings)
-					pgetter = (void*)GetUnicodeTX;
+                    assert(this->_mpkey.size() == dataframeColCount);
+                    this->_mpkey.push_back(_vkeydata.size() - 1);
+                    if (llTotalNumRows == -1)
+                        llTotalNumRows = len(list);
+                    else
+                        assert(llTotalNumRows == len(list));
+                }
+                else
+                    continue;
+                break;
+                // Text values.
+            case (ML_PY_TEXT):
+            case (ML_PY_UNICODE):
+                isText = true;
+                kind = TX;
+                if (colType == ML_PY_TEXT)
+                    pgetter = (void*)GetTX;
+                else // colType is "unicode"
+                     // in python 2.7 strings can be passed as unicode bytestring (NOT the same as UTF8 encoded strings)
+                    pgetter = (void*)GetUnicodeTX;
 
-				// TODO: Handle vectors.
-				//this->_vvecCard.push_back(vecCard);
-				this->_vtextdata.push_back(list);
+                // TODO: Handle vectors.
+                //this->_vvecCard.push_back(vecCard);
+                this->_vtextdata.push_back(list);
 
-				assert(this->_mptxt.size() == dataframeColCount);
-				this->_mptxt.push_back(_vtextdata.size() - 1);
-				if (llTotalNumRows == -1)
-					llTotalNumRows = len(list);
-				else
-					assert(llTotalNumRows == len(list));
-				break;
-			default:
-				throw std::invalid_argument("column " + colName + " has unsupported type");
-			}
-		}
-		// A sparse vector.
-		else if (bp::extract<bp::dict>(value).check())
-		{
-			bp::dict sparse = bp::extract<bp::dict>(value);
-			np::ndarray indices = bp::extract<np::ndarray>(sparse["indices"]);
-			_sparseIndices = (int*)indices.get_data();
-			np::ndarray indptr = bp::extract<np::ndarray>(sparse["indptr"]);
-			_indPtr = (int*)indptr.get_data();
+                assert(this->_mptxt.size() == dataframeColCount);
+                this->_mptxt.push_back(_vtextdata.size() - 1);
+                if (llTotalNumRows == -1)
+                    llTotalNumRows = len(list);
+                else
+                    assert(llTotalNumRows == len(list));
+                break;
+            default:
+                throw std::invalid_argument("column " + colName + " has unsupported type");
+            }
+        }
+        // A sparse vector.
+        else if (bp::extract<bp::dict>(value).check())
+        {
+            bp::dict sparse = bp::extract<bp::dict>(value);
+            np::ndarray indices = bp::extract<np::ndarray>(sparse["indices"]);
+            _sparseIndices = (int*)indices.get_data();
+            np::ndarray indptr = bp::extract<np::ndarray>(sparse["indptr"]);
+            _indPtr = (int*)indptr.get_data();
 
-			np::ndarray values = bp::extract<np::ndarray>(sparse["values"]);
-			_sparseValues = values.get_data();
-			switch (colType)
-			{
-			case (ML_PY_BOOL):
-				kind = BL;
-				pgetter = (void*)&GetBLVector;
-				break;
-			case (ML_PY_UINT8):
-				kind = U1;
-				pgetter = (void*)&GetU1Vector;
-				break;
-			case (ML_PY_UINT16):
-				kind = U2;
-				pgetter = (void*)&GetU2Vector;
-				break;
-			case (ML_PY_UINT32):
-				kind = U4;
-				pgetter = (void*)&GetU4Vector;
-				break;
-			case (ML_PY_UINT64):
-				kind = U8;
-				pgetter = (void*)&GetU8Vector;
-				break;
-			case (ML_PY_INT8):
-				kind = I1;
-				pgetter = (void*)&GetI1Vector;
-				break;
-			case (ML_PY_INT16):
-				kind = I2;
-				pgetter = (void*)&GetI2Vector;
-				break;
-			case (ML_PY_INT32):
-				kind = I4;
-				pgetter = (void*)&GetI4Vector;
-				break;
-			case (ML_PY_INT64):
-				kind = I8;
-				pgetter = (void*)&GetI8Vector;
-				break;
-			case (ML_PY_FLOAT16):
-				throw std::invalid_argument("numpy.float16 data type is not supported in sparse data");
-			case (ML_PY_FLOAT32):
-				kind = R4;
-				pgetter = (void*)&GetR4Vector;
-				break;
-			case (ML_PY_FLOAT64):
-				kind = R8;
-				pgetter = (void*)&GetR8Vector;
-				break;
-			default:
-				throw std::invalid_argument("column " + colName + " has unsupported type");
-			}
-			vecCard = bp::extract<int>(sparse["colCount"]);
-			name = (char*)"Data";
+            np::ndarray values = bp::extract<np::ndarray>(sparse["values"]);
+            _sparseValues = values.get_data();
+            switch (colType)
+            {
+            case (ML_PY_BOOL):
+                kind = BL;
+                pgetter = (void*)&GetBLVector;
+                break;
+            case (ML_PY_UINT8):
+                kind = U1;
+                pgetter = (void*)&GetU1Vector;
+                break;
+            case (ML_PY_UINT16):
+                kind = U2;
+                pgetter = (void*)&GetU2Vector;
+                break;
+            case (ML_PY_UINT32):
+                kind = U4;
+                pgetter = (void*)&GetU4Vector;
+                break;
+            case (ML_PY_UINT64):
+                kind = U8;
+                pgetter = (void*)&GetU8Vector;
+                break;
+            case (ML_PY_INT8):
+                kind = I1;
+                pgetter = (void*)&GetI1Vector;
+                break;
+            case (ML_PY_INT16):
+                kind = I2;
+                pgetter = (void*)&GetI2Vector;
+                break;
+            case (ML_PY_INT32):
+                kind = I4;
+                pgetter = (void*)&GetI4Vector;
+                break;
+            case (ML_PY_INT64):
+                kind = I8;
+                pgetter = (void*)&GetI8Vector;
+                break;
+            case (ML_PY_FLOAT16):
+                throw std::invalid_argument("numpy.float16 data type is not supported in sparse data");
+            case (ML_PY_FLOAT32):
+                kind = R4;
+                pgetter = (void*)&GetR4Vector;
+                break;
+            case (ML_PY_FLOAT64):
+                kind = R8;
+                pgetter = (void*)&GetR8Vector;
+                break;
+            default:
+                throw std::invalid_argument("column " + colName + " has unsupported type");
+            }
+            vecCard = bp::extract<int>(sparse["colCount"]);
+            name = (char*)"Data";
 
-			if (llTotalNumRows == -1)
-				llTotalNumRows = len(indptr) - 1;
-			else
-				assert(llTotalNumRows == len(indptr) - 1);
-		}
-		else
-			throw std::invalid_argument("unsupported data type provided");
+            if (llTotalNumRows == -1)
+                llTotalNumRows = len(indptr) - 1;
+            else
+                assert(llTotalNumRows == len(indptr) - 1);
+        }
+        else
+            throw std::invalid_argument("unsupported data type provided");
 
-		this->_vgetter.push_back(pgetter);
-		this->_vname.push_back(name);
-		this->_vkind.push_back(kind);
-		_vvecCard.push_back(vecCard);
+        this->_vgetter.push_back(pgetter);
+        this->_vname.push_back(name);
+        this->_vkind.push_back(kind);
+        _vvecCard.push_back(vecCard);
 
-		if (!isNumeric)
-		{
-			assert(this->_mpnum.size() == dataframeColCount);
-			this->_mpnum.push_back(-1);
-		}
-		if (!isKey)
-		{
-			assert(this->_mpkey.size() == dataframeColCount);
-			this->_mpkey.push_back(-1);
-			this->_vkeyCard.push_back(-1);
-		}
-		if (!isText)
-		{
-			assert(this->_mptxt.size() == dataframeColCount);
-			this->_mptxt.push_back(-1);
-		}
-	}
+        if (!isNumeric)
+        {
+            assert(this->_mpnum.size() == dataframeColCount);
+            this->_mpnum.push_back(-1);
+        }
+        if (!isKey)
+        {
+            assert(this->_mpkey.size() == dataframeColCount);
+            this->_mpkey.push_back(-1);
+            this->_vkeyCard.push_back(-1);
+        }
+        if (!isText)
+        {
+            assert(this->_mptxt.size() == dataframeColCount);
+            this->_mptxt.push_back(-1);
+        }
+    }
 
-	assert(_vname.size() <= (size_t)(dataframeColCount + 1));
+    assert(_vname.size() <= (size_t)(dataframeColCount + 1));
 
-	this->crow = llTotalNumRows;
-	this->ccol = this->_vname.size();
-	this->getLabels = &GetKeyNames;
+    this->crow = llTotalNumRows;
+    this->ccol = this->_vname.size();
+    this->getLabels = &GetKeyNames;
 
-	assert(this->ccol == this->_vkind.size());
-	assert(this->ccol == this->_vkeyCard.size());
-	assert(this->ccol == this->_vgetter.size());
+    assert(this->ccol == this->_vkind.size());
+    assert(this->ccol == this->_vkeyCard.size());
+    assert(this->ccol == this->_vgetter.size());
 
-	// This is used in Revo, but seems to not be needed here.
-	this->ids = nullptr;
-
-	if (this->ccol > 0)
-	{
-		this->names = &this->_vname[0];
-		this->kinds = &this->_vkind[0];
-		this->keyCards = &this->_vkeyCard[0];
-		this->vecCards = &this->_vvecCard[0];
-		this->getters = &this->_vgetter[0];
-	}
-	else
-	{
-		this->names = nullptr;
-		this->kinds = nullptr;
-		this->keyCards = nullptr;
-		this->vecCards = nullptr;
-		this->getters = nullptr;
-	}
+    if (this->ccol > 0)
+    {
+        this->names = &this->_vname[0];
+        this->kinds = &this->_vkind[0];
+        this->keyCards = &this->_vkeyCard[0];
+        this->vecCards = &this->_vvecCard[0];
+        this->getters = &this->_vgetter[0];
+    }
+    else
+    {
+        this->names = nullptr;
+        this->kinds = nullptr;
+        this->keyCards = nullptr;
+        this->vecCards = nullptr;
+        this->getters = nullptr;
+    }
 }
 
 DataSourceBlock::~DataSourceBlock()
 {
 
 #if _MSC_VER
-	for (std::vector<char*>::iterator it = this->_vtextdata_cache.begin(); it != this->_vtextdata_cache.end(); ++it) {
-		char* tmp = *it;
-		if (tmp != NULL)
-			free(tmp);
-	}
+    for (std::vector<char*>::iterator it = this->_vtextdata_cache.begin(); it != this->_vtextdata_cache.end(); ++it) {
+        char* tmp = *it;
+        if (tmp != NULL)
+            free(tmp);
+    }
 #endif
 
-	FillDead(this->ccol);
-	FillDead(this->crow);
+    FillDead(this->ccol);
+    FillDead(this->crow);
 
-	FillDead(this->names);
-	FillDead(this->kinds);
-	FillDead(this->keyCards);
-	FillDead(this->vecCards);
-	FillDead(this->getters);
-	FillDead(this->getLabels);
+    FillDead(this->names);
+    FillDead(this->kinds);
+    FillDead(this->keyCards);
+    FillDead(this->vecCards);
+    FillDead(this->getters);
+    FillDead(this->getLabels);
 }
 
 
diff --git a/src/NativeBridge/DataViewInterop.h b/src/NativeBridge/DataViewInterop.h
index 0f3011fa..c764b285 100644
--- a/src/NativeBridge/DataViewInterop.h
+++ b/src/NativeBridge/DataViewInterop.h
@@ -25,482 +25,480 @@ using namespace boost::python;
 // REVIEW: Need to figure out proper story for multi-threaded execution.
 class DataSourceBlock
 {
-	// Fields that are visible to managed code come first and do not start with an underscore.
-	// Fields that are only visible to this code start with an underscore.
+    // Fields that are visible to managed code come first and do not start with an underscore.
+    // Fields that are only visible to this code start with an underscore.
 
 private:
-	// *** These fields are known by managed code. It is critical that this struct not have a vtable.
-	//     It is also critical that the layout of this prefix NOT vary from release to release or build to build.
-
-	// Number of columns.
-	CxInt64 ccol;
-	// Total number of rows. Zero for unknown.
-	CxInt64 crow;
-
-	// Column ids.
-	const CxInt64 *ids;
-	// Column names.
-	const char **names;
-	// Column data kinds.
-	const BYTE *kinds;
-	// Column key type cardinalities. Zero for unbounded, -1 for non-key-types.
-	const CxInt64 *keyCards;
-	// Column vector type cardinalities. Zero for variable size, -1 for non-vector-types.
-	const CxInt64 *vecCards;
-	// The call back item getter function pointers. Currently only used for string
-	// values (nullptr for others). For strings these are GETSTR function pointers.
-	const void **getters;
-
-	// Call back function for getting labels.
-	GETLABELS getLabels;
+    // *** These fields are known by managed code. It is critical that this struct not have a vtable.
+    //     It is also critical that the layout of this prefix NOT vary from release to release or build to build.
+
+    // Number of columns.
+    CxInt64 ccol;
+    // Total number of rows. Zero for unknown.
+    CxInt64 crow;
+
+    // Column names.
+    const char **names;
+    // Column data kinds.
+    const BYTE *kinds;
+    // Column key type cardinalities. Zero for unbounded, -1 for non-key-types.
+    const CxInt64 *keyCards;
+    // Column vector type cardinalities. Zero for variable size, -1 for non-vector-types.
+    const CxInt64 *vecCards;
+    // The call back item getter function pointers. Currently only used for string
+    // values (nullptr for others). For strings these are GETSTR function pointers.
+    const void **getters;
+
+    // Call back function for getting labels.
+    GETLABELS getLabels;
 
 private:
-	// *** Stuff below here is not known by the managed code.
-
-	std::vector<CxInt64> _mpnum;
-	std::vector<CxInt64> _mptxt;
-	std::vector<CxInt64> _mpkey;
-
-	// The vectors below here are parallel.
-
-	// Column names.
-	std::vector<const char*> _vname;
-	// Column DataKind values.
-	std::vector<BYTE> _vkind;
-	// Column key type cardinalities. Zero for unbounded, -1 for non-key-types.
-	std::vector<CxInt64> _vkeyCard;
-	// Column vector type cardinalities. Zero for variable size, -1 for non-vector-types.
-	std::vector<CxInt64> _vvecCard;
-	// Data getters for the columns (null for non-text columns).
-	std::vector<const void *> _vgetter;
-
-	std::vector<const void*> _vdata;
-	std::vector<bp::list> _vtextdata;
-	std::vector<char*> _vtextdata_cache;
-	std::vector<bp::list> _vkeydata;
-	std::vector<bp::list> _vkeynames;
-
-	// Stores the sparse data.
-	// REVIEW: need better documentatoin here - is this a pointer, or buffer ? If buffer, why this is not a vector ? Where do we store type of values ? What is indptr ?
-	void* _sparseValues;
-	int* _sparseIndices;
-	int* _indPtr;
+    // *** Stuff below here is not known by the managed code.
+
+    std::vector<CxInt64> _mpnum;
+    std::vector<CxInt64> _mptxt;
+    std::vector<CxInt64> _mpkey;
+
+    // The vectors below here are parallel.
+
+    // Column names.
+    std::vector<const char*> _vname;
+    // Column DataKind values.
+    std::vector<BYTE> _vkind;
+    // Column key type cardinalities. Zero for unbounded, -1 for non-key-types.
+    std::vector<CxInt64> _vkeyCard;
+    // Column vector type cardinalities. Zero for variable size, -1 for non-vector-types.
+    std::vector<CxInt64> _vvecCard;
+    // Data getters for the columns (null for non-text columns).
+    std::vector<const void *> _vgetter;
+
+    std::vector<const void*> _vdata;
+    std::vector<bp::list> _vtextdata;
+    std::vector<char*> _vtextdata_cache;
+    std::vector<bp::list> _vkeydata;
+    std::vector<bp::list> _vkeynames;
+
+    // Stores the sparse data.
+    // REVIEW: need better documentatoin here - is this a pointer, or buffer ? If buffer, why this is not a vector ? Where do we store type of values ? What is indptr ?
+    void* _sparseValues;
+    int* _sparseIndices;
+    int* _indPtr;
 
 public:
-	DataSourceBlock(bp::dict& data);
-	~DataSourceBlock();
+    DataSourceBlock(bp::dict& data);
+    ~DataSourceBlock();
 
 private:
 
-	bp::object SelectItemForType(bp::list& container)
-	{
-		auto length = len(container);
-
-		for (auto index = 0; index < length; index++)
-		{
-			bp::object item = container[index];
-
-			if (!item.is_none())
-			{
-				return item;
-			}
-		}
-
-		return bp::object();
-	}
-
-	// Callback methods. These are only needed from managed code via the embedded function pointers above,
-	// so can be private.
-	static MANAGED_CALLBACK(void) GetBL(DataSourceBlock *pdata, int col, long index, /*out*/ signed char &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const signed char *charData = reinterpret_cast<const signed char*>(pdata->_vdata[numCol]);
-		dst = charData[index];
-	}
-	static MANAGED_CALLBACK(void) GetBL64(DataSourceBlock *pdata, int col, long index, /*out*/ signed char &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const double *charData = reinterpret_cast<const double*>(pdata->_vdata[numCol]);
-		if (boost::math::isnan(charData[index]))
-			dst = -1;
-		else
-			dst = (signed char)charData[index];
-	}
-	static MANAGED_CALLBACK(void) GetU1(DataSourceBlock *pdata, int col, long index, /*out*/ unsigned char &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const unsigned char *charData = reinterpret_cast<const unsigned char*>(pdata->_vdata[numCol]);
-		dst = charData[index];
-	}
-	static MANAGED_CALLBACK(void) GetU2(DataSourceBlock *pdata, int col, long index, /*out*/ unsigned short &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const unsigned short *shortData = reinterpret_cast<const unsigned short*>(pdata->_vdata[numCol]);
-		dst = shortData[index];
-	}
-	static MANAGED_CALLBACK(void) GetU4(DataSourceBlock *pdata, int col, long index, /*out*/ unsigned int &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const unsigned int *intData = reinterpret_cast<const unsigned int*>(pdata->_vdata[numCol]);
-		dst = intData[index];
-	}
-	static MANAGED_CALLBACK(void) GetU8(DataSourceBlock *pdata, int col, long index, /*out*/ CxUInt64 &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const CxUInt64 *longData = reinterpret_cast<const CxUInt64*>(pdata->_vdata[numCol]);
-		dst = longData[index];
-	}
-	static MANAGED_CALLBACK(void) GetI1(DataSourceBlock *pdata, int col, long index, /*out*/ signed char &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const signed char *charData = reinterpret_cast<const signed char*>(pdata->_vdata[numCol]);
-		dst = charData[index];
-	}
-	static MANAGED_CALLBACK(void) GetI2(DataSourceBlock *pdata, int col, long index, /*out*/ short &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const short *shortData = reinterpret_cast<const short*>(pdata->_vdata[numCol]);
-		dst = shortData[index];
-	}
-	static MANAGED_CALLBACK(void) GetI4(DataSourceBlock *pdata, int col, long index, /*out*/ int &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const int *intData = reinterpret_cast<const int*>(pdata->_vdata[numCol]);
-		dst = intData[index];
-	}
-	static MANAGED_CALLBACK(void) GetI8(DataSourceBlock *pdata, int col, long index, /*out*/ CxInt64 &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const CxInt64 *longData = reinterpret_cast<const CxInt64*>(pdata->_vdata[numCol]);
-		dst = longData[index];
-	}
-	static MANAGED_CALLBACK(void) GetR4(DataSourceBlock *pdata, int col, long index, /*out*/ float &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const float *floatData = reinterpret_cast<const float*>(pdata->_vdata[numCol]);
-		dst = floatData[index];
-	}
-	static MANAGED_CALLBACK(void) GetR8(DataSourceBlock *pdata, int col, long index, /*out*/ double &dst)
-	{
-		CxInt64 numCol = pdata->_mpnum[col];
-		assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
-		const double *doubleData = reinterpret_cast<const double*>(pdata->_vdata[numCol]);
-		dst = doubleData[index];
-	}
-
-	// Call back from C# to map from data buffer and index to char* and convert to UTF16.
-	static MANAGED_CALLBACK(void) GetTX(DataSourceBlock *pdata, int col, long index, const/*out*/ char*& pch, /*out*/int32_t &size, /*out*/int32_t &missing)
-	{
-		CxInt64 txCol = pdata->_mptxt[col];
-		assert(0 <= txCol && txCol < (CxInt64)pdata->_vtextdata.size());
-		bp::object s = pdata->_vtextdata[txCol][index];
-		
-		if (bp::extract<const char*>(s).check())
-		{
-			size = -1;
-			missing = -1;
-			pch = bp::extract<const char*>(s);
-			if (s.is_none())
-			{
-				size = 0;
-				pch = 0;
-			}
-			else
-			{
+    bp::object SelectItemForType(bp::list& container)
+    {
+        auto length = len(container);
+
+        for (auto index = 0; index < length; index++)
+        {
+            bp::object item = container[index];
+
+            if (!item.is_none())
+            {
+                return item;
+            }
+        }
+
+        return bp::object();
+    }
+
+    // Callback methods. These are only needed from managed code via the embedded function pointers above,
+    // so can be private.
+    static MANAGED_CALLBACK(void) GetBL(DataSourceBlock *pdata, int col, long index, /*out*/ signed char &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const signed char *charData = reinterpret_cast<const signed char*>(pdata->_vdata[numCol]);
+        dst = charData[index];
+    }
+    static MANAGED_CALLBACK(void) GetBL64(DataSourceBlock *pdata, int col, long index, /*out*/ signed char &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const double *charData = reinterpret_cast<const double*>(pdata->_vdata[numCol]);
+        if (boost::math::isnan(charData[index]))
+            dst = -1;
+        else
+            dst = (signed char)charData[index];
+    }
+    static MANAGED_CALLBACK(void) GetU1(DataSourceBlock *pdata, int col, long index, /*out*/ unsigned char &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const unsigned char *charData = reinterpret_cast<const unsigned char*>(pdata->_vdata[numCol]);
+        dst = charData[index];
+    }
+    static MANAGED_CALLBACK(void) GetU2(DataSourceBlock *pdata, int col, long index, /*out*/ unsigned short &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const unsigned short *shortData = reinterpret_cast<const unsigned short*>(pdata->_vdata[numCol]);
+        dst = shortData[index];
+    }
+    static MANAGED_CALLBACK(void) GetU4(DataSourceBlock *pdata, int col, long index, /*out*/ unsigned int &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const unsigned int *intData = reinterpret_cast<const unsigned int*>(pdata->_vdata[numCol]);
+        dst = intData[index];
+    }
+    static MANAGED_CALLBACK(void) GetU8(DataSourceBlock *pdata, int col, long index, /*out*/ CxUInt64 &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const CxUInt64 *longData = reinterpret_cast<const CxUInt64*>(pdata->_vdata[numCol]);
+        dst = longData[index];
+    }
+    static MANAGED_CALLBACK(void) GetI1(DataSourceBlock *pdata, int col, long index, /*out*/ signed char &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const signed char *charData = reinterpret_cast<const signed char*>(pdata->_vdata[numCol]);
+        dst = charData[index];
+    }
+    static MANAGED_CALLBACK(void) GetI2(DataSourceBlock *pdata, int col, long index, /*out*/ short &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const short *shortData = reinterpret_cast<const short*>(pdata->_vdata[numCol]);
+        dst = shortData[index];
+    }
+    static MANAGED_CALLBACK(void) GetI4(DataSourceBlock *pdata, int col, long index, /*out*/ int &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const int *intData = reinterpret_cast<const int*>(pdata->_vdata[numCol]);
+        dst = intData[index];
+    }
+    static MANAGED_CALLBACK(void) GetI8(DataSourceBlock *pdata, int col, long index, /*out*/ CxInt64 &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const CxInt64 *longData = reinterpret_cast<const CxInt64*>(pdata->_vdata[numCol]);
+        dst = longData[index];
+    }
+    static MANAGED_CALLBACK(void) GetR4(DataSourceBlock *pdata, int col, long index, /*out*/ float &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const float *floatData = reinterpret_cast<const float*>(pdata->_vdata[numCol]);
+        dst = floatData[index];
+    }
+    static MANAGED_CALLBACK(void) GetR8(DataSourceBlock *pdata, int col, long index, /*out*/ double &dst)
+    {
+        CxInt64 numCol = pdata->_mpnum[col];
+        assert(0 <= numCol && numCol < (CxInt64)pdata->_vdata.size());
+        const double *doubleData = reinterpret_cast<const double*>(pdata->_vdata[numCol]);
+        dst = doubleData[index];
+    }
+
+    // Call back from C# to map from data buffer and index to char* and convert to UTF16.
+    static MANAGED_CALLBACK(void) GetTX(DataSourceBlock *pdata, int col, long index, const/*out*/ char*& pch, /*out*/int32_t &size, /*out*/int32_t &missing)
+    {
+        CxInt64 txCol = pdata->_mptxt[col];
+        assert(0 <= txCol && txCol < (CxInt64)pdata->_vtextdata.size());
+        bp::object s = pdata->_vtextdata[txCol][index];
+
+        if (bp::extract<const char*>(s).check())
+        {
+            size = -1;
+            missing = -1;
+            pch = bp::extract<const char*>(s);
+            if (s.is_none())
+            {
+                size = 0;
+                pch = 0;
+            }
+            else
+            {
 #if _MSC_VER
-				Utf8ToUtf16le(pch, pch, size);
+                Utf8ToUtf16le(pch, pch, size);
 #endif
-				pdata->_vtextdata_cache.push_back((char*)pch);
-			}
-		}
-		else
-		{
-			// Missing values in Python are float.NaN.
-			assert(bp::extract<float>(s).check());
-			missing = 1;
-		}
-	}
-
-	// The method below executes in python 2.7 only! 
+                pdata->_vtextdata_cache.push_back((char*)pch);
+            }
+        }
+        else
+        {
+            // Missing values in Python are float.NaN.
+            assert(bp::extract<float>(s).check());
+            missing = 1;
+        }
+    }
+
+    // The method below executes in python 2.7 only! 
     // Call back from C# to get text data in UTF16 from unicode bytestring
     static MANAGED_CALLBACK(void) GetUnicodeTX(DataSourceBlock *pdata, int col, long index, const/*out*/ char*& pch, /*out*/int32_t &size, /*out*/int32_t &missing)
-	{
+    {
         CxInt64 txCol = pdata->_mptxt[col];
         assert(0 <= txCol && txCol < (CxInt64)pdata->_vtextdata.size());
         auto s = pdata->_vtextdata[txCol][index];
 
-		if (bp::extract<const char*>(str(s).encode("utf_8")).check())
-		{
-			missing = -1;
-			pch = bp::extract<const char*>(str(s).encode("utf_8"));
+        if (bp::extract<const char*>(str(s).encode("utf_8")).check())
+        {
+            missing = -1;
+            pch = bp::extract<const char*>(str(s).encode("utf_8"));
 #if _MSC_VER
             Utf8ToUtf16le(pch, pch, size);
 #endif
-			pdata->_vtextdata_cache.push_back((char*)pch);
-		}
-		else
-		{
-			// Missing values in Python are float.NaN.
-			assert(bp::extract<float>(s).check());
-			missing = 1;
-		}
+            pdata->_vtextdata_cache.push_back((char*)pch);
+        }
+        else
+        {
+            // Missing values in Python are float.NaN.
+            assert(bp::extract<float>(s).check());
+            missing = 1;
+        }
     }
 
 #if _MSC_VER
-	static void Utf8ToUtf16le(const char* utf8Str, const/*out*/ char*& pch, /*out*/int &size)
-	{
-		// Allocate the utf16 string buffer.
-		size = MultiByteToWideChar(CP_UTF8, 0, utf8Str, -1, NULL, 0);
-		if (size == 0)
-		{
-			pch = 0;
-			return;
-		}
-
-		wchar_t* utf16Str = new wchar_t[size];
-
-		try
-		{
-			// Convert the utf8 string.
-			MultiByteToWideChar(CP_UTF8, 0, utf8Str, -1, utf16Str, size);
-		}
-		catch (...)
-		{
-			// On exception clean up and re-throw.
-			if (utf16Str) delete[] utf16Str;
-			throw;
-		}
-
-		// size includes a NULL character at the end, discount it
-		assert(utf16Str[size - 1] == L'\0');
-		size -= 1;
-		pch = (char*)utf16Str;
-	}
+    static void Utf8ToUtf16le(const char* utf8Str, const/*out*/ char*& pch, /*out*/int &size)
+    {
+        // Allocate the utf16 string buffer.
+        size = MultiByteToWideChar(CP_UTF8, 0, utf8Str, -1, NULL, 0);
+        if (size == 0)
+        {
+            pch = 0;
+            return;
+        }
+
+        wchar_t* utf16Str = new wchar_t[size];
+
+        try
+        {
+            // Convert the utf8 string.
+            MultiByteToWideChar(CP_UTF8, 0, utf8Str, -1, utf16Str, size);
+        }
+        catch (...)
+        {
+            // On exception clean up and re-throw.
+            if (utf16Str) delete[] utf16Str;
+            throw;
+        }
+
+        // size includes a NULL character at the end, discount it
+        assert(utf16Str[size - 1] == L'\0');
+        size -= 1;
+        pch = (char*)utf16Str;
+    }
 #endif
 
-	static MANAGED_CALLBACK(void) GetKeyInt(DataSourceBlock *pdata, int col, long index, /*out*/ int& dst)
-	{
-		CxInt64 keyCol = pdata->_mpkey[col];
-		assert(0 <= keyCol && keyCol < (CxInt64)pdata->_vkeydata.size());
-
-		auto & list = pdata->_vkeydata[keyCol];
-		bp::object obj = pdata->SelectItemForType(list);
-		assert(strcmp(obj.ptr()->ob_type->tp_name, "int") == 0);
-		dst = bp::extract<int>(list[index]);
-	}
-
-	// Callback function for getting labels for key-type columns. Returns success.
-	static MANAGED_CALLBACK(bool) GetKeyNames(DataSourceBlock *pdata, int col, int count, const char **buffer)
-	{
-		if (count <= 0 || buffer == nullptr)
-		{
-			// Invalid count or buffer, don't zero out buffer returning.
-			assert(false);
-			return false;
-		}
-		if (pdata == nullptr)
-		{
-			// Invalid pdata.
-			return OnGetLabelsFailure(count, buffer);
-		}
-		if (0 > col || (size_t)col >= pdata->_mpkey.size())
-		{
-			// Invalid column id.
-			return OnGetLabelsFailure(count, buffer);
-		}
-		if (pdata->_vkeyCard[col] != count)
-		{
-			// Column is not a key type.
-			return OnGetLabelsFailure(count, buffer);
-		}
-
-		CxInt64 keyCol = pdata->_mpkey[col];
-		bp::list & names = pdata->_vkeynames[keyCol];
-		if (len(names) != count)
-		{
-			// No labels for this column. This is not a logic error.
-			return OnGetLabelsFailure(count, buffer);
-		}
-
-		for (int i = 0; i < count; ++i, ++buffer)
-			*buffer = bp::extract<const char*>(names[i]);
-		return true;
-	}
-
-	static bool OnGetLabelsFailure(int count, const char **buffer)
-	{
-		assert(false);
-		for (int i = 0; i < count; i++)
-			buffer[i] = nullptr;
-		return false;
-	}
-
-	// Same method has two modes: if "inquire" is true, it returns the number of indices/values needed for the current row.
-	// If "inquire" is false, it assumes that indices/values are big enough, and fills them in for the current row.
-	static MANAGED_CALLBACK(void) GetBLVector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned char* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const unsigned char *boolData = reinterpret_cast<const unsigned char*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = boolData[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetU1Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned char* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const unsigned char *int8Data = reinterpret_cast<const unsigned char*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int8Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetU2Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned short* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const unsigned short *int16Data = reinterpret_cast<const unsigned short*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int16Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetU4Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned int* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const unsigned int *int32Data = reinterpret_cast<const unsigned int*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int32Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetU8Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, CxUInt64* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const unsigned long *int64Data = reinterpret_cast<const unsigned long*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int64Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetI1Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, signed char* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const signed char *int8Data = reinterpret_cast<const signed char*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int8Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetI2Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, short* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const short *int16Data = reinterpret_cast<const short*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int16Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetI4Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, int* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const int *int32Data = reinterpret_cast<const int*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int32Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetI8Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, CxInt64* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const CxInt64 *int64Data = reinterpret_cast<const CxInt64*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = int64Data[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetR4Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, float* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const float *floatData = reinterpret_cast<const float*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = floatData[pdata->_indPtr[index] + i];
-		}
-	}
-
-	static MANAGED_CALLBACK(void) GetR8Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, double* values, bool inquire, /*out*/ int &size)
-	{
-		size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
-		if (inquire)
-			return;
-
-		const double *doubleData = reinterpret_cast<const double*>(pdata->_sparseValues);
-		for (int i = 0; i < size; i++)
-		{
-			indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
-			values[i] = doubleData[pdata->_indPtr[index] + i];
-		}
-	}
+    static MANAGED_CALLBACK(void) GetKeyInt(DataSourceBlock *pdata, int col, long index, /*out*/ int& dst)
+    {
+        CxInt64 keyCol = pdata->_mpkey[col];
+        assert(0 <= keyCol && keyCol < (CxInt64)pdata->_vkeydata.size());
+
+        auto & list = pdata->_vkeydata[keyCol];
+        bp::object obj = pdata->SelectItemForType(list);
+        assert(strcmp(obj.ptr()->ob_type->tp_name, "int") == 0);
+        dst = bp::extract<int>(list[index]);
+    }
+
+    // Callback function for getting labels for key-type columns. Returns success.
+    static MANAGED_CALLBACK(bool) GetKeyNames(DataSourceBlock *pdata, int col, int count, const char **buffer)
+    {
+        if (count <= 0 || buffer == nullptr)
+        {
+            // Invalid count or buffer, don't zero out buffer returning.
+            assert(false);
+            return false;
+        }
+        if (pdata == nullptr)
+        {
+            // Invalid pdata.
+            return OnGetLabelsFailure(count, buffer);
+        }
+        if (0 > col || (size_t)col >= pdata->_mpkey.size())
+        {
+            // Invalid column id.
+            return OnGetLabelsFailure(count, buffer);
+        }
+        if (pdata->_vkeyCard[col] != count)
+        {
+            // Column is not a key type.
+            return OnGetLabelsFailure(count, buffer);
+        }
+
+        CxInt64 keyCol = pdata->_mpkey[col];
+        bp::list & names = pdata->_vkeynames[keyCol];
+        if (len(names) != count)
+        {
+            // No labels for this column. This is not a logic error.
+            return OnGetLabelsFailure(count, buffer);
+        }
+
+        for (int i = 0; i < count; ++i, ++buffer)
+            *buffer = bp::extract<const char*>(names[i]);
+        return true;
+    }
+
+    static bool OnGetLabelsFailure(int count, const char **buffer)
+    {
+        assert(false);
+        for (int i = 0; i < count; i++)
+            buffer[i] = nullptr;
+        return false;
+    }
+
+    // Same method has two modes: if "inquire" is true, it returns the number of indices/values needed for the current row.
+    // If "inquire" is false, it assumes that indices/values are big enough, and fills them in for the current row.
+    static MANAGED_CALLBACK(void) GetBLVector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned char* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const unsigned char *boolData = reinterpret_cast<const unsigned char*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = boolData[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetU1Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned char* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const unsigned char *int8Data = reinterpret_cast<const unsigned char*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int8Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetU2Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned short* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const unsigned short *int16Data = reinterpret_cast<const unsigned short*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int16Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetU4Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, unsigned int* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const unsigned int *int32Data = reinterpret_cast<const unsigned int*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int32Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetU8Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, CxUInt64* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const unsigned long *int64Data = reinterpret_cast<const unsigned long*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int64Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetI1Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, signed char* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const signed char *int8Data = reinterpret_cast<const signed char*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int8Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetI2Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, short* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const short *int16Data = reinterpret_cast<const short*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int16Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetI4Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, int* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const int *int32Data = reinterpret_cast<const int*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int32Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetI8Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, CxInt64* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const CxInt64 *int64Data = reinterpret_cast<const CxInt64*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = int64Data[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetR4Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, float* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const float *floatData = reinterpret_cast<const float*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = floatData[pdata->_indPtr[index] + i];
+        }
+    }
+
+    static MANAGED_CALLBACK(void) GetR8Vector(DataSourceBlock *pdata, int col, CxInt64 index, int* indices, double* values, bool inquire, /*out*/ int &size)
+    {
+        size = pdata->_indPtr[index + 1] - pdata->_indPtr[index];
+        if (inquire)
+            return;
+
+        const double *doubleData = reinterpret_cast<const double*>(pdata->_sparseValues);
+        for (int i = 0; i < size; i++)
+        {
+            indices[i] = pdata->_sparseIndices[pdata->_indPtr[index] + i];
+            values[i] = doubleData[pdata->_indPtr[index] + i];
+        }
+    }
 };
 
 // A native wrapper around a managed IDataView for receiving data back from managed code.
@@ -508,40 +506,45 @@ class DataSourceBlock
 // This is filled in by managed code and referenced by native code.
 struct DataViewBlock
 {
-	// *** These fields are shared from managed code. It is critical that this struct not have a vtable.
-	//     It is also critical that the layout of this NOT vary from release to release or build to build.
-	//     The managed code assumes that CxInt64 occupies 8 bytes, and each pointer occupies 8 bytes.
-
-	// Number of columns.
-	CxInt64 ccol;
-	// Total number of rows. Zero for unknown.
-	CxInt64 crow;
-
-	// Column names.
-	const char **names;
-	// Column data kinds.
-	const BYTE *kinds;
-	// Column key type cardinalities. Only contains the values for the columns that have
-	// key names.
-	const int *keyCards;
+    // *** These fields are shared from managed code. It is critical that this struct not have a vtable.
+    //     It is also critical that the layout of this NOT vary from release to release or build to build.
+    //     The managed code assumes that CxInt64 occupies 8 bytes, and each pointer occupies 8 bytes.
+
+    // Number of columns.
+    CxInt64 ccol;
+    // Total number of rows. Zero for unknown.
+    CxInt64 crow;
+
+    // Column names.
+    const char **names;
+    // Column data kinds.
+    const BYTE *kinds;
+    // Column key type cardinalities. Only contains the values for the columns that have
+    // key names.
+    const int *keyCards;
+    // The number of values in each row of a column.
+    // A value count of 0 means that each row of the
+    // column is variable length.
+    const BYTE *valueCounts;
 };
 
 enum ML_PY_TYPE_MAP_ENUM {
-	ML_PY_BOOL = '?',
-	ML_PY_BOOL64 = '!',
-	ML_PY_UINT8 = 'B',
-	ML_PY_UINT16 = 'H',
-	ML_PY_UINT32 = 'I',
-	ML_PY_UINT64 = 'Q',
-	ML_PY_INT8 = 'b',
-	ML_PY_INT16 = 'h',
-	ML_PY_INT32 = 'i',
-	ML_PY_INT64 = 'q',
-	ML_PY_FLOAT16 = 'e',
-	ML_PY_FLOAT32 = 'f',
-	ML_PY_FLOAT64 = 'd',
-	ML_PY_CAT = 'c',
-	ML_PY_TEXT = 't',
-	ML_PY_UNICODE = 'u',
-	ML_PY_UNSUPPORTED = 'x'
+    ML_PY_BOOL = '?',
+    ML_PY_BOOL64 = '!',
+    ML_PY_UINT8 = 'B',
+    ML_PY_UINT16 = 'H',
+    ML_PY_UINT32 = 'I',
+    ML_PY_UINT64 = 'Q',
+    ML_PY_INT8 = 'b',
+    ML_PY_INT16 = 'h',
+    ML_PY_INT32 = 'i',
+    ML_PY_INT64 = 'q',
+    ML_PY_FLOAT16 = 'e',
+    ML_PY_FLOAT32 = 'f',
+    ML_PY_FLOAT64 = 'd',
+    ML_PY_CAT = 'c',
+    ML_PY_TEXT = 't',
+    ML_PY_UNICODE = 'u',
+    ML_PY_DATETIME = 'z',
+    ML_PY_UNSUPPORTED = 'x'
 };
diff --git a/src/NativeBridge/ManagedInterop.cpp b/src/NativeBridge/ManagedInterop.cpp
index bca89755..6a80000d 100644
--- a/src/NativeBridge/ManagedInterop.cpp
+++ b/src/NativeBridge/ManagedInterop.cpp
@@ -6,322 +6,267 @@
 #include "DataViewInterop.h"
 #include "ManagedInterop.h"
 
-inline void destroyManagerCObject(PyObject* obj) {
-	auto* b = static_cast<PythonObjectBase*>(PyCapsule_GetPointer(obj, NULL));
-	if (b) { delete b; }
-}
-
-#define SetDict2(cpptype, nptype); \
-		{\
-			PythonObject<cpptype>* col = dynamic_cast<PythonObject<cpptype>*>(column);\
-			auto shrd = col->GetData();\
-			auto* data = shrd->data();\
-			bp::handle<> h(::PyCapsule_New((void*)column, NULL, (PyCapsule_Destructor)&destroyManagerCObject));\
-			dict[_names[i]] = np::from_data(\
-				data,\
-				np::dtype::get_builtin<nptype>(),\
-				bp::make_tuple(shrd->size()),\
-				bp::make_tuple(sizeof(nptype)), bp::object(h));\
-		}
-
-#define SetDict1(type) SetDict2(type, type)
 
-#define SetDictAndKeys(type, i); \
-		{\
-			PythonObject<type>* col = dynamic_cast<PythonObject<type>*>(column);\
-			auto shrd = col->GetData();\
-			auto* data = shrd->data();\
-			bp::handle<> h(::PyCapsule_New((void*)column, NULL, (PyCapsule_Destructor)&destroyManagerCObject));\
-			np::ndarray npdata = np::from_data(\
-				data,\
-				np::dtype::get_builtin<type>(),\
-				bp::make_tuple(shrd->size()),\
-				bp::make_tuple(sizeof(float)), bp::object(h));\
-			if (keyNames == nullptr)\
-			{\
-				dict[_names[i]] = npdata;\
-			}\
-			else\
-			{\
-				dict[_names[i]] = bp::dict();\
-				dict[_names[i]]["..Data"] = npdata;\
-				auto shrd = keyNames->GetData();\
-				bp::list list;\
-				for (int j = 0; j < shrd->size(); j++)\
-				{\
-					bp::object obj;\
-					const std::string& value = shrd->at(j);\
-					if (!value.empty())\
-					{\
-						obj = bp::object(value);\
-					}\
-					list.append(obj);\
-				}\
-				dict[_names[i]]["..KeyValues"] = list;\
-			}\
-		}\
+#define AddToDict(type); \
+        {\
+            PyColumn<type>* col = dynamic_cast<PyColumn<type>*>(column);\
+            col->AddToDict(dict, _names[i], keyNames, maxRows);\
+        }\
 
 #define STATIC
 
+
 EnvironmentBlock::~EnvironmentBlock()
 {
-	// Everything (except data buffers) that we might have exposed to managed code,
-	// fill with dead values.
-	FillDead(this->verbosity);
-	FillDead(this->seed);
-	FillDead(this->messageSink);
-	FillDead(this->modelSink);
-	FillDead(this->checkCancel);
+    // Everything (except data buffers) that we might have exposed to managed code,
+    // fill with dead values.
+    FillDead(this->verbosity);
+    FillDead(this->seed);
+    FillDead(this->maxSlots);
+    FillDead(this->messageSink);
+    FillDead(this->modelSink);
+    FillDead(this->checkCancel);
 
-	for (size_t i = 0; i < _vset.size(); i++)
-		FillDead(_vset[i]);
+    for (size_t i = 0; i < _vset.size(); i++)
+        FillDead(_vset[i]);
 }
 
-EnvironmentBlock::EnvironmentBlock(int verbosity, int maxThreadsAllowed, int seed, const char* pythonPath)
+EnvironmentBlock::EnvironmentBlock(int verbosity, int maxSlots, int seed, const char* pythonPath)
 {
-	// Assert that this class doesn't have a vtable.
-	assert(offsetof(EnvironmentBlock, verbosity) == 0);
+    // Assert that this class doesn't have a vtable.
+    assert(offsetof(EnvironmentBlock, verbosity) == 0);
 
-	this->_errCode = PyErrorCode_NoError;
-	this->verbosity = verbosity;
-	this->maxThreadsAllowed = maxThreadsAllowed;
-	this->seed = seed;
-	this->pythonPath = pythonPath;
-	this->_kindMask = (1 << Warning) | (1 << Error);
-	if (verbosity > 0)
-		this->_kindMask |= (1 << Info);
-	if (this->verbosity > 3)
-		this->_kindMask |= (1 << Trace);
-	this->dataSink = &DataSink;
-	this->messageSink = &MessageSink;
-	this->modelSink = &ModelSink;
-	this->checkCancel = &CheckCancel;
+    this->verbosity = verbosity;
+    this->maxSlots = maxSlots;
+    this->seed = seed;
+    this->pythonPath = pythonPath;
+    this->_kindMask = (1 << Warning) | (1 << Error);
+    if (verbosity > 0)
+        this->_kindMask |= (1 << Info);
+    if (this->verbosity > 3)
+        this->_kindMask |= (1 << Trace);
+    this->dataSink = &DataSink;
+    this->messageSink = &MessageSink;
+    this->modelSink = &ModelSink;
+    this->checkCancel = &CheckCancel;
 }
 
 STATIC MANAGED_CALLBACK(void) EnvironmentBlock::DataSink(EnvironmentBlock *penv, const DataViewBlock *pdata, void **&setters, void *&keyValueSetter)
 {
-	penv->DataSinkCore(pdata);
-	setters = &penv->_vset[0];
-	keyValueSetter = (void *)&SetKeyValue;
+    penv->DataSinkCore(pdata);
+    setters = &penv->_vset[0];
+    keyValueSetter = (void *)&SetKeyValue;
 }
 
 void EnvironmentBlock::DataSinkCore(const DataViewBlock * pdata)
 {
-	assert(pdata != nullptr);
+    assert(pdata != nullptr);
 
-	// Create a data set.
-	CxInt64 numKeys = 0;
-	for (int i = 0; i < pdata->ccol; i++)
-	{
-		BYTE kind = pdata->kinds[i];
-		_columns.push_back(PythonObjectBase::CreateObject(kind, pdata->crow, 1));
+    for (int i = 0; i < pdata->ccol; i++)
+    {
+        BYTE kind = pdata->kinds[i];
+        _columns.push_back(PyColumnBase::Create(kind, pdata->crow, pdata->valueCounts[i]));
 
-		switch (kind)
-		{
-		case BL:
-			_vset.push_back((void*)&SetBL);
-			break;
-		case I1:
-			_vset.push_back((void*)&SetI1);
-			break;
-		case I2:
-			_vset.push_back((void*)&SetI2);
-			break;
-		case I4:
-			_vset.push_back((void*)&SetI4);
-			break;
-		case I8:
-			_vset.push_back((void*)&SetI8);
-			break;
-		case U1:
-			_vset.push_back((void*)&SetU1);
-			break;
-		case U2:
-			_vset.push_back((void*)&SetU2);
-			break;
-		case U4:
-			_vset.push_back((void*)&SetU4);
-			break;
-		case U8:
-			_vset.push_back((void*)&SetU8);
-			break;
-		case R4:
-			_vset.push_back((void*)&SetR4);
-			break;
-		case R8:
-			_vset.push_back((void*)&SetR8);
-			break;
-		case TX:
-			_vset.push_back((void*)&SetTX);
-			break;
-		case TS:  // tbd
-		case DT:  // tbd
-		case DZ:  // tbd
-		default:
-			throw std::invalid_argument("data type is not supported " + std::to_string(kind));
-		}
+        switch (kind)
+        {
+        case BL:
+            _vset.push_back((void*)&SetBL);
+            break;
+        case I1:
+            _vset.push_back((void*)&SetI1);
+            break;
+        case I2:
+            _vset.push_back((void*)&SetI2);
+            break;
+        case I4:
+            _vset.push_back((void*)&SetI4);
+            break;
+        case DT:
+        case I8:
+            _vset.push_back((void*)&SetI8);
+            break;
+        case U1:
+            _vset.push_back((void*)&SetU1);
+            break;
+        case U2:
+            _vset.push_back((void*)&SetU2);
+            break;
+        case U4:
+            _vset.push_back((void*)&SetU4);
+            break;
+        case U8:
+            _vset.push_back((void*)&SetU8);
+            break;
+        case R4:
+            _vset.push_back((void*)&SetR4);
+            break;
+        case R8:
+            _vset.push_back((void*)&SetR8);
+            break;
+        case TX:
+            _vset.push_back((void*)&SetTX);
+            break;
+        case TS:  // tbd
+        case DZ:  // tbd
+        default:
+            throw std::invalid_argument("data type is not supported " + std::to_string(kind));
+        }
 
-		if (pdata->keyCards[i] >= 0)
-		{
-			_vKeyValues.push_back(new PythonObject<std::string>(TX, pdata->keyCards[i], 1));
-			_columnToKeyMap.push_back(numKeys++);
-		}
-		else
-			_columnToKeyMap.push_back(-1);
+        if (pdata->keyCards && (pdata->keyCards[i] >= 0))
+        {
+            _columnToKeyMap.insert(i);
+            _vKeyValues.push_back(new PyColumnSingle<std::string>(TX, pdata->keyCards[i]));
+        }
 
-		_names.push_back(pdata->names[i]);
-	}
+        _names.push_back(pdata->names[i]);
+    }
 }
 
 STATIC MANAGED_CALLBACK(void) EnvironmentBlock::ModelSink(EnvironmentBlock * env,
-	const unsigned char * pBinaryModel, size_t iModelLen)
+    const unsigned char * pBinaryModel, size_t iModelLen)
 {
 }
 
 
 STATIC MANAGED_CALLBACK(void) EnvironmentBlock::MessageSink(EnvironmentBlock * env, MessageKind kind,
-	const char * sender, const char * message)
+    const char * sender, const char * message)
 {
-	bool bShowMessage = (env->_kindMask >> kind) & 1;
-	string sMessage(message);
-	string sSender(sender);
+    bool bShowMessage = (env->_kindMask >> kind) & 1;
+    string sMessage(message);
+    string sSender(sender);
 
-	if (bShowMessage)
-	{
-		CX_TraceIn("MessageSink");
-		string sMessage = std::string(message);
-		string sSender = std::string(sender);
+    if (bShowMessage)
+    {
+        CX_TraceIn("MessageSink");
+        string sMessage = std::string(message);
+        string sSender = std::string(sender);
 
-		switch (kind)
-		{
-		default:
-		case Info:
-			sMessage = sMessage + "\n";
-			break;
-		case Warning:
-			sMessage = "Warning: " + sMessage + "\n";
-			break;
-		case Trace:
-			sMessage = sSender + ": " + sMessage + "\n";
-			break;
-		case Error:  // We will throw the error when ConnectToMlNet returns
-			sMessage = "Error: " + sMessage;
-			break;
-		}
+        switch (kind)
+        {
+        default:
+        case Info:
+            sMessage = sMessage + "\n";
+            break;
+        case Warning:
+            sMessage = "Warning: " + sMessage + "\n";
+            break;
+        case Trace:
+            sMessage = sSender + ": " + sMessage + "\n";
+            break;
+        case Error:  // We will throw the error when ConnectToMlNet returns
+            sMessage = "Error: " + sMessage;
+            env->_errMessage = sMessage;
+            break;
+        }
 
-		// Redirect message to Python streams
-		PyObject *sys = PyImport_ImportModule("sys");
-		PyObject *pystream = PyObject_GetAttrString(sys, (kind == Error) ? "stderr" : "stdout");
-		PyObject_CallMethod(pystream, "write", "s", sMessage.c_str());
-		PyObject_CallMethod(pystream, "flush", NULL);
-		Py_XDECREF(pystream);
-		Py_XDECREF(sys);
+        // Redirect message to Python streams
+        PyObject *sys = PyImport_ImportModule("sys");
+        PyObject *pystream = PyObject_GetAttrString(sys, (kind == Error) ? "stderr" : "stdout");
+        PyObject_CallMethod(pystream, "write", "s", sMessage.c_str());
+        PyObject_CallMethod(pystream, "flush", NULL);
+        Py_XDECREF(pystream);
+        Py_XDECREF(sys);
 
-		CX_TraceOut("MessageSink");
-	}
+        CX_TraceOut("MessageSink");
+    }
 }
 
 STATIC MANAGED_CALLBACK(bool) EnvironmentBlock::CheckCancel()
 {
-	return false;
+    return false;
 }
 
 bp::dict EnvironmentBlock::GetData()
 {
-	if (_names.size() == 0)
-	{
-		return bp::dict();
-	}
+    if (_columns.size() == 0)
+    {
+        return bp::dict();
+    }
+
+    size_t maxRows = 0;
+    for (size_t i = 0; i < _columns.size(); i++)
+    {
+        size_t numRows = _columns[i]->GetNumRows();
+        if (numRows > maxRows) maxRows = numRows;
+    }
 
-	bp::dict dict = bp::dict();
-	for (size_t i = 0; i < _names.size(); i++)
-	{
-		PythonObjectBase* column = _columns[i];
-		PythonObject<std::string>* keyNames = nullptr;
-		if (_columnToKeyMap[i] >= 0)
-			keyNames = _vKeyValues[_columnToKeyMap[i]];
+    CxInt64 numKeys = 0;
+    bp::dict dict = bp::dict();
+    for (size_t i = 0; i < _columns.size(); i++)
+    {
+        PyColumnBase* column = _columns[i];
+        const std::vector<std::string>* keyNames = nullptr;
+        if (_columnToKeyMap.find(i) != _columnToKeyMap.end())
+            keyNames = _vKeyValues[numKeys++]->GetData();
 
-		signed char kind = column->GetKind();
-		switch (kind) {
-		case -1:
-		{
-			PythonObject<signed char>* col = dynamic_cast<PythonObject<signed char>*>(column);
-			auto shrd = col->GetData();
-			bp::list list;
-			for (size_t i = 0; i < shrd->size(); i++)
-			{
-				bp::object obj;
-				signed char value = shrd->at(i);
-				if (value < 0)
-					obj = bp::object(NAN);
-				else if (value == 0)
-					obj = bp::object(false);
-				else
-					obj = bp::object(true);
+        signed char kind = column->GetKind();
+        switch (kind) {
+        case -1:
+        {
+            PyColumnSingle<signed char>* col = dynamic_cast<PyColumnSingle<signed char>*>(column);
+            auto shrd = col->GetData();
+            bp::list list;
+            for (size_t i = 0; i < shrd->size(); i++)
+            {
+                bp::object obj;
+                signed char value = shrd->at(i);
+                if (value < 0)
+                    obj = bp::object(NAN);
+                else if (value == 0)
+                    obj = bp::object(false);
+                else
+                    obj = bp::object(true);
 
-				list.append(obj);
-			}
-			dict[_names[i]] = list;
-		}
-		break;
-		case BL:
-			SetDict2(signed char, bool);
-			break;
-		case I1:
-			SetDictAndKeys(signed char, i);
-			break;
-		case I2:
-			SetDictAndKeys(signed short, i);
-			break;
-		case I4:
-			SetDictAndKeys(signed int, i);
-			break;
-		case I8:
-			SetDict1(CxInt64);
-			break;
-		case U1:
-			SetDict1(unsigned char);
-			break;
-		case U2:
-			SetDict1(unsigned short);
-			break;
-		case U4:
-			SetDict1(unsigned int);
-			break;
-		case U8:
-			SetDict1(CxUInt64);
-			break;
-		case R4:
-			SetDict1(float);
-			break;
-		case R8:
-			SetDict1(double);
-			break;
-		case TX:
-		{
-			PythonObject<string>* col = dynamic_cast<PythonObject<string>*>(column);
-			auto shrd = col->GetData();
-			bp::list list;
-			for (size_t i = 0; i < shrd->size(); i++)
-			{
-				bp::object obj;
-				const std::string& value = shrd->at(i);
-				if (!value.empty())
-				{
-					obj = bp::object(value);
-				}
-				list.append(obj);
-			}
-			dict[_names[i]] = list;
-			delete column;
-		}
-		break;
-		case TS:
-		case DT:
-		case DZ:
-		default:
-			throw std::invalid_argument("data type is not supported " + std::to_string(kind));
-		}
-	}
-	return dict;
+                list.append(obj);
+            }
+            dict[_names[i]] = list;
+        }
+        break;
+        case BL:
+            AddToDict(signed char);
+            break;
+        case I1:
+            AddToDict(signed char);
+            break;
+        case I2:
+            AddToDict(signed short);
+            break;
+        case I4:
+            AddToDict(signed int);
+            break;
+        case I8:
+            AddToDict(CxInt64);
+            break;
+        case U1:
+            AddToDict(unsigned char);
+            break;
+        case U2:
+            AddToDict(unsigned short);
+            break;
+        case U4:
+            AddToDict(unsigned int);
+            break;
+        case U8:
+            AddToDict(CxUInt64);
+            break;
+        case R4:
+            AddToDict(float);
+            break;
+        case R8:
+            AddToDict(double);
+            break;
+        case TX:
+            AddToDict(std::string);
+            delete column;
+            break;
+        case DT:
+            AddToDict(CxInt64);
+            break;
+        case TS:
+        case DZ:
+        default:
+            throw std::invalid_argument("data type is not supported " + std::to_string(kind));
+        }
+    }
+    return dict;
 }
diff --git a/src/NativeBridge/ManagedInterop.h b/src/NativeBridge/ManagedInterop.h
index 5d9582a3..485a59cc 100644
--- a/src/NativeBridge/ManagedInterop.h
+++ b/src/NativeBridge/ManagedInterop.h
@@ -4,6 +4,7 @@
 using namespace std;
 #include "stdafx.h"
 #include "PythonInterop.h"
+#include <unordered_set>
 
 #define CX_TraceOut(...)
 #define CX_TraceIn(...)
@@ -15,32 +16,25 @@ struct DataViewBlock;
 // WARNING: These values are defined by the ML.NET code so should not be changed!
 enum MessageKind
 {
-	Trace = 0,
-	Info = 1,
-	Warning = 2,
-	Error = 3
-};
-
-// These are only used locally
-enum PyErrorCode
-{
-	PyErrorCode_NoError = 0,
-	PyErrorCode_Failure = 1
+    Trace = 0,
+    Info = 1,
+    Warning = 2,
+    Error = 3
 };
 
 // REVIEW: the exceptions thrown in the callbacks will not be caught by BxlServer on Linux.
 // On Linux, CoreCLR will ignore previous stack frames, i.e., those before entering the managed code.
 typedef MANAGED_CALLBACK_PTR(void, MODELSINK) (EnvironmentBlock * env,
-	const unsigned char * binaryModel, size_t modelLen);
+    const unsigned char * binaryModel, size_t modelLen);
 
 typedef MANAGED_CALLBACK_PTR(void, MESSAGESINK)(EnvironmentBlock *penv, MessageKind kind,
-	const char * sender, const char * message);
+    const char * sender, const char * message);
 
 typedef MANAGED_CALLBACK_PTR(void, DATASINK)(EnvironmentBlock *penv, const DataViewBlock *pdata,
-	// Outputs:
-	// * setters: item setter function pointers.
-	// keyValueSetter: setter for key values.
-	void **& setters, void *& keyValueSetter);
+    // Outputs:
+    // * setters: item setter function pointers.
+    // keyValueSetter: setter for key values.
+    void **& setters, void *& keyValueSetter);
 
 // Callback function for getting cancel flag.
 typedef MANAGED_CALLBACK_PTR(bool, CHECKCANCEL)();
@@ -52,152 +46,150 @@ typedef MANAGED_CALLBACK_PTR(void, SETSTR)(void *pv, CxInt64 index, const char *
 // As such, it is critical that this class NOT have a vtable, so virtual functions are illegal!
 class CLASS_ALIGN EnvironmentBlock
 {
-	// Fields that are visible to managed code come first and do not start with an underscore.
-	// Fields that are only visible to this code start with an underscore.
+    // Fields that are visible to managed code come first and do not start with an underscore.
+    // Fields that are only visible to this code start with an underscore.
 
 private:
-	// *** These fields are known by managed code. It is critical that this struct not have a vtable.
-	//     It is also critical that the layout of this prefix NOT vary from release to release or build to build.
-	//     The managed code assumes that each pointer occupies 8 bytes.
+    // *** These fields are known by managed code. It is critical that this struct not have a vtable.
+    //     It is also critical that the layout of this prefix NOT vary from release to release or build to build.
+    //     The managed code assumes that each pointer occupies 8 bytes.
 
-	// Indicates a verbosity level. Zero means default (minimal). Larger generally means more information.
-	int verbosity;
+    // Indicates a verbosity level. Zero means default (minimal). Larger generally means more information.
+    int verbosity;
 
-	// The random seed.
-	int seed;
+    // The random seed.
+    int seed;
 
-	// The message sink.
-	MESSAGESINK messageSink;
+    // The message sink.
+    MESSAGESINK messageSink;
 
-	// The data sink.
-	DATASINK dataSink;
+    // The data sink.
+    DATASINK dataSink;
 
-	// The model sink.
-	MODELSINK modelSink;
+    // The model sink.
+    MODELSINK modelSink;
 
-	// Indicates max threads allowed. Less than one means default (maximal).
-	int maxThreadsAllowed;
+    // Max slots to return for vector valued columns(<=0 to return all).
+    int maxSlots;
 
-	// Check cancellation flag.
-	CHECKCANCEL checkCancel;
+    // Check cancellation flag.
+    CHECKCANCEL checkCancel;
 
-	// Path to python executable
-	const char* pythonPath;
+    // Path to python executable
+    const char* pythonPath;
 
 public:
-	EnvironmentBlock(int verbosity = 0, int maxThreadsAllowed = 0, int seed = 42, const char* pythonPath = NULL);
-	~EnvironmentBlock();
-	PyErrorCode GetErrorCode() { return _errCode; }
-	std::string GetErrorMessage() { return _errMessage; }
-	bp::dict GetData();
+    EnvironmentBlock(int verbosity = 0, int maxSlots = -1, int seed = 42, const char* pythonPath = NULL);
+    ~EnvironmentBlock();
+    std::string GetErrorMessage() { return _errMessage; }
+    bp::dict GetData();
 
 private:
-	static MANAGED_CALLBACK(void) DataSink(EnvironmentBlock *penv, const DataViewBlock *pdata, void **&setters, void *&keyValueSetter);
-	static MANAGED_CALLBACK(void) MessageSink(EnvironmentBlock *penv, MessageKind kind, const char *sender, const char *message);
-	static MANAGED_CALLBACK(void) ModelSink(EnvironmentBlock *penv, const unsigned char *pBinaryModel, size_t iModelLen);
-	static MANAGED_CALLBACK(bool) CheckCancel();
+    static MANAGED_CALLBACK(void) DataSink(EnvironmentBlock *penv, const DataViewBlock *pdata, void **&setters, void *&keyValueSetter);
+    static MANAGED_CALLBACK(void) MessageSink(EnvironmentBlock *penv, MessageKind kind, const char *sender, const char *message);
+    static MANAGED_CALLBACK(void) ModelSink(EnvironmentBlock *penv, const unsigned char *pBinaryModel, size_t iModelLen);
+    static MANAGED_CALLBACK(bool) CheckCancel();
 
 private:
-	void DataSinkCore(const DataViewBlock * pdata);
+    void DataSinkCore(const DataViewBlock * pdata);
 
 private:
-	// This has a bit set for each kind of message that is desired.
-	int _kindMask;
-	// Fields used by the data callbacks. These keep the appropriate memory alive during the data operations.
-	int _irowBase;
-	int _crowWant;
-	std::vector<void*> _vset;
-	PyErrorCode _errCode;
-	std::string _errMessage;
-
-	std::vector<std::string> _names;
-	std::vector<PythonObjectBase*> _columns;
-	// Maps between the column index, and the index in _vKeyValues containing the key names, or -1 if 
-	// there are no key names.
-	std::vector<CxInt64> _columnToKeyMap;
-
-	std::vector<PythonObject<std::string>*> _vKeyValues;
-
-	static MANAGED_CALLBACK(void) SetR4(EnvironmentBlock *env, int col, long index, float value)
-	{
-		PythonObject<float>* colObject = dynamic_cast<PythonObject<float>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetR8(EnvironmentBlock *env, int col, long index, double value)
-	{
-		PythonObject<double>* colObject = dynamic_cast<PythonObject<double>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetBL(EnvironmentBlock *env, int col, long index, signed char value)
-	{
-		PythonObject<signed char>* colObject = dynamic_cast<PythonObject<signed char>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-		if (value < 0)
-			env->_columns[col]->SetKind(-1);
-	}
-	static MANAGED_CALLBACK(void) SetI1(EnvironmentBlock *env, int col, long index, signed char value)
-	{
-		PythonObject<signed char>* colObject = dynamic_cast<PythonObject<signed char>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetI2(EnvironmentBlock *env, int col, long index, short value)
-	{
-		PythonObject<short>* colObject = dynamic_cast<PythonObject<short>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetI4(EnvironmentBlock *env, int col, long index, int value)
-	{
-		PythonObject<int>* colObject = dynamic_cast<PythonObject<int>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetI8(EnvironmentBlock *env, int col, long index, CxInt64 value)
-	{
-		PythonObject<CxInt64>* colObject = dynamic_cast<PythonObject<CxInt64>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetU1(EnvironmentBlock *env, int col, long index, unsigned char value)
-	{
-		PythonObject<unsigned char>* colObject = dynamic_cast<PythonObject<unsigned char>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetU2(EnvironmentBlock *env, int col, long index, unsigned short value)
-	{
-		PythonObject<unsigned short>* colObject = dynamic_cast<PythonObject<unsigned short>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetU4(EnvironmentBlock *env, int col, long index, unsigned int value)
-	{
-		PythonObject<unsigned int>* colObject = dynamic_cast<PythonObject<unsigned int>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetU8(EnvironmentBlock *env, int col, long index, CxUInt64 value)
-	{
-		PythonObject<CxUInt64>* colObject = dynamic_cast<PythonObject<CxUInt64>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, value);
-	}
-	static MANAGED_CALLBACK(void) SetTX(EnvironmentBlock *env, int col, long index, char* value, long length)
-	{
-		PythonObject<string>* colObject = dynamic_cast<PythonObject<string>*>(env->_columns[col]);
-		assert(colObject != nullptr);
-		colObject->SetAt(index, 0, std::string(value, length));
-	}
-	static MANAGED_CALLBACK(void) SetKeyValue(EnvironmentBlock *env, int keyColumnIndex, int keyCode, char* value, long length)
-	{
-		assert(keyColumnIndex < env->_vKeyValues.size());
-		PythonObject<string>* keyNamesObject = env->_vKeyValues[keyColumnIndex];
-		keyNamesObject->SetAt(keyCode, 0, std::string(value, length));
-	}
+    // This has a bit set for each kind of message that is desired.
+    int _kindMask;
+    // Fields used by the data callbacks. These keep the appropriate memory alive during the data operations.
+    int _irowBase;
+    int _crowWant;
+    std::vector<void*> _vset;
+    std::string _errMessage;
+
+    // Column names.
+    std::vector<std::string> _names;
+    std::vector<PyColumnBase*> _columns;
+
+    // Set of all key column indexes.
+    std::unordered_set<CxInt64> _columnToKeyMap;
+    std::vector<PyColumnSingle<std::string>*> _vKeyValues;
+
+    static MANAGED_CALLBACK(void) SetR4(EnvironmentBlock *env, int col, long m, long n, float value)
+    {
+        PyColumn<float>* colObject = dynamic_cast<PyColumn<float>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetR8(EnvironmentBlock *env, int col, long m, long n, double value)
+    {
+        PyColumn<double>* colObject = dynamic_cast<PyColumn<double>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetBL(EnvironmentBlock *env, int col, long m, long n, signed char value)
+    {
+        PyColumn<signed char>* colObject = dynamic_cast<PyColumn<signed char>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+        if (value < 0)
+            env->_columns[col]->SetKind(-1);
+    }
+    static MANAGED_CALLBACK(void) SetI1(EnvironmentBlock *env, int col, long m, long n, signed char value)
+    {
+        PyColumn<signed char>* colObject = dynamic_cast<PyColumn<signed char>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetI2(EnvironmentBlock *env, int col, long m, long n, short value)
+    {
+        PyColumn<short>* colObject = dynamic_cast<PyColumn<short>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetI4(EnvironmentBlock *env, int col, long m, long n, int value)
+    {
+        PyColumn<int>* colObject = dynamic_cast<PyColumn<int>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetI8(EnvironmentBlock *env, int col, long m, long n, CxInt64 value)
+    {
+        PyColumn<CxInt64>* colObject = dynamic_cast<PyColumn<CxInt64>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetU1(EnvironmentBlock *env, int col, long m, long n, unsigned char value)
+    {
+        PyColumn<unsigned char>* colObject = dynamic_cast<PyColumn<unsigned char>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetU2(EnvironmentBlock *env, int col, long m, long n, unsigned short value)
+    {
+        PyColumn<unsigned short>* colObject = dynamic_cast<PyColumn<unsigned short>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetU4(EnvironmentBlock *env, int col, long m, long n, unsigned int value)
+    {
+        PyColumn<unsigned int>* colObject = dynamic_cast<PyColumn<unsigned int>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetU8(EnvironmentBlock *env, int col, long m, long n, CxUInt64 value)
+    {
+        PyColumn<CxUInt64>* colObject = dynamic_cast<PyColumn<CxUInt64>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, value);
+    }
+    static MANAGED_CALLBACK(void) SetTX(EnvironmentBlock *env, int col, long m, long n, char* value, long length)
+    {
+        PyColumn<string>* colObject = dynamic_cast<PyColumn<string>*>(env->_columns[col]);
+        assert(colObject != nullptr);
+        colObject->SetAt(m, n, std::string(value, length));
+    }
+    static MANAGED_CALLBACK(void) SetKeyValue(EnvironmentBlock *env, int keyColumnIndex, int keyCode, char* value, long length)
+    {
+        assert(keyColumnIndex < env->_vKeyValues.size());
+        PyColumn<string>* keyNamesObject = env->_vKeyValues[keyColumnIndex];
+        keyNamesObject->SetAt(keyCode, 0, std::string(value, length));
+    }
 };
 
 
@@ -208,32 +200,32 @@ class CLASS_ALIGN EnvironmentBlock
 
 inline void FillDead(int& x)
 {
-	assert(sizeof(int) == 4);
-	x = BAD_QUAD;
+    assert(sizeof(int) == 4);
+    x = BAD_QUAD;
 }
 
 inline void FillDead(CxInt64& x)
 {
-	assert(sizeof(CxInt64) == 8);
-	assert(sizeof(int) == 4);
-	((int *)&x)[0] = BAD_QUAD;
-	((int *)&x)[1] = BAD_QUAD;
+    assert(sizeof(CxInt64) == 8);
+    assert(sizeof(int) == 4);
+    ((int *)&x)[0] = BAD_QUAD;
+    ((int *)&x)[1] = BAD_QUAD;
 }
 
 template<typename T>
 inline void FillDead(T*& x)
 {
-	assert(sizeof(T*) == 8);
-	assert(sizeof(int) == 4);
-	((int *)&x)[0] = BAD_QUAD;
-	((int *)&x)[1] = BAD_QUAD;
+    assert(sizeof(T*) == 8);
+    assert(sizeof(int) == 4);
+    ((int *)&x)[0] = BAD_QUAD;
+    ((int *)&x)[1] = BAD_QUAD;
 }
 
 struct MlNetExecutionError : std::exception
 {
-	MlNetExecutionError(const char *message) : msg_(message) { }
-	virtual char const *what() const noexcept { return msg_.c_str(); }
+    MlNetExecutionError(const char *message) : msg_(message) { }
+    virtual char const *what() const noexcept { return msg_.c_str(); }
 
 private:
-	std::string msg_;
+    std::string msg_;
 };
diff --git a/src/NativeBridge/PythonInterop.cpp b/src/NativeBridge/PythonInterop.cpp
index c0a833b7..f2d4ee87 100644
--- a/src/NativeBridge/PythonInterop.cpp
+++ b/src/NativeBridge/PythonInterop.cpp
@@ -1,55 +1,339 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT license.
 
+#include <math.h> 
 #include "stdafx.h"
 #include "PythonInterop.h"
 
-PythonObjectBase::PythonObjectBase(const int& kind)
+
+inline void destroyManagerCObject(PyObject* obj) {
+    auto* b = static_cast<PyColumnBase*>(PyCapsule_GetPointer(obj, NULL));
+    if (b) { delete b; }
+}
+
+
+PyColumnBase::PyColumnBase(const int& kind)
 {
-	_kind = kind;
+    _kind = kind;
 }
 
-PythonObjectBase::~PythonObjectBase()
+PyColumnBase::~PyColumnBase()
 {
 }
 
-PythonObjectBase::creation_map* PythonObjectBase::m_pCreationMap = PythonObjectBase::CreateMap();
+PyColumnBase::creation_map* PyColumnBase::m_pSingleCreationMap = PyColumnBase::CreateSingleMap();
+PyColumnBase::creation_map* PyColumnBase::m_pVariableCreationMap = PyColumnBase::CreateVariableMap();
+
+PyColumnBase::creation_map* PyColumnBase::CreateSingleMap()
+{
+    PyColumnBase::creation_map* map = new PyColumnBase::creation_map();
+
+    map->insert(creation_map_entry(BL, CreateSingle<signed char>));
+    map->insert(creation_map_entry(I1, CreateSingle<signed char>));
+    map->insert(creation_map_entry(I2, CreateSingle<signed short>));
+    map->insert(creation_map_entry(I4, CreateSingle<signed int>));
+    map->insert(creation_map_entry(I8, CreateSingle<CxInt64>));
+    map->insert(creation_map_entry(U1, CreateSingle<unsigned char>));
+    map->insert(creation_map_entry(U2, CreateSingle<unsigned short>));
+    map->insert(creation_map_entry(U4, CreateSingle<unsigned int>));
+    map->insert(creation_map_entry(U8, CreateSingle<CxUInt64>));
+    map->insert(creation_map_entry(R4, CreateSingle<float>));
+    map->insert(creation_map_entry(R8, CreateSingle<double>));
+    map->insert(creation_map_entry(TX, CreateSingle<std::string>));
+    map->insert(creation_map_entry(DT, CreateSingle<CxInt64>));
+    return map;
+}
+
+PyColumnBase::creation_map* PyColumnBase::CreateVariableMap()
+{
+    PyColumnBase::creation_map* map = new PyColumnBase::creation_map();
+
+    map->insert(creation_map_entry(BL, CreateVariable<signed char, float>));
+    map->insert(creation_map_entry(I1, CreateVariable<signed char, float>));
+    map->insert(creation_map_entry(I2, CreateVariable<signed short, float>));
+    map->insert(creation_map_entry(I4, CreateVariable<signed int, double>));
+    map->insert(creation_map_entry(I8, CreateVariable<CxInt64, double>));
+    map->insert(creation_map_entry(U1, CreateVariable<unsigned char, float>));
+    map->insert(creation_map_entry(U2, CreateVariable<unsigned short, float>));
+    map->insert(creation_map_entry(U4, CreateVariable<unsigned int, double>));
+    map->insert(creation_map_entry(U8, CreateVariable<CxUInt64, double>));
+    map->insert(creation_map_entry(R4, CreateVariable<float, float>));
+    map->insert(creation_map_entry(R8, CreateVariable<double, double>));
+    map->insert(creation_map_entry(TX, CreateVariable<std::string, NullableString>));
+    return map;
+}
+
+PyColumnBase* PyColumnBase::Create(const int& kind, size_t numRows, size_t numCols)
+{
+    if (numCols == 0)
+    {
+        creation_map::iterator found = m_pVariableCreationMap->find(kind);
+        if (found != m_pVariableCreationMap->end())
+            return found->second(kind, numRows);
+    }
+    else
+    {
+        creation_map::iterator found = m_pSingleCreationMap->find(kind);
+        if (found != m_pSingleCreationMap->end())
+            return found->second(kind, numRows);
+    }
+
+    std::stringstream message;
+    message << "Columns of kind " << kind << " are not supported.";
+    throw std::invalid_argument(message.str().c_str());
+}
 
-PythonObjectBase::creation_map* PythonObjectBase::CreateMap()
+template <class T> PyColumnBase* PyColumnBase::CreateSingle(const int& kind, size_t nRows)
 {
-	PythonObjectBase::creation_map* map = new PythonObjectBase::creation_map();
+    return new PyColumnSingle<T>(kind, nRows);
+}
 
-	map->insert(creation_map_entry(BL, CreateObject<signed char>));
-	map->insert(creation_map_entry(I1, CreateObject<signed char>));
-	map->insert(creation_map_entry(I2, CreateObject<short>));
-	map->insert(creation_map_entry(I4, CreateObject<int>));
-	map->insert(creation_map_entry(I8, CreateObject<CxInt64>));
-	map->insert(creation_map_entry(U1, CreateObject<unsigned char>));
-	map->insert(creation_map_entry(U2, CreateObject<unsigned short>));
-	map->insert(creation_map_entry(U4, CreateObject<unsigned int>));
-	map->insert(creation_map_entry(U8, CreateObject<CxUInt64>));
-	map->insert(creation_map_entry(R4, CreateObject<float>));
-	map->insert(creation_map_entry(R8, CreateObject<double>));
-	map->insert(creation_map_entry(TX, CreateObject<std::string>));
-	return map;
+template <class T, class T2> PyColumnBase* PyColumnBase::CreateVariable(const int& kind, size_t nRows)
+{
+    return new PyColumnVariable<T, T2>(kind, nRows);
 }
 
-PythonObjectBase* PythonObjectBase::CreateObject(const int& kind, size_t numRows, size_t numCols)
+template <class T>
+void PyColumnSingle<T>::AddToDict(bp::dict& dict,
+                                  const std::string& name,
+                                  const std::vector<std::string>* keyNames,
+                                  const size_t expectedRows)
 {
-	creation_map::iterator found = m_pCreationMap->find(kind);
+    auto* data = _pData->data();
 
-	if (found == m_pCreationMap->end())
-	{
-		std::stringstream message;
-		message << "Columns of kind " << kind << " are not supported.";
-		throw std::invalid_argument(message.str().c_str());
-	}
+    switch (this->_kind)
+    {
+    case DataKind::BL:
+    {
+        bp::handle<> h(::PyCapsule_New((void*)this, NULL, (PyCapsule_Destructor)&destroyManagerCObject));
+        dict[name] = np::from_data(
+            data,
+            np::dtype::get_builtin<bool>(),
+            bp::make_tuple(_pData->size()),
+            bp::make_tuple(sizeof(bool)), bp::object(h));
+    }
+    break;
+    case DataKind::I1:
+    case DataKind::I2:
+    case DataKind::I4:
+    {
+        bp::handle<> h(::PyCapsule_New((void*)this, NULL, (PyCapsule_Destructor)&destroyManagerCObject));
+        np::ndarray npdata = np::from_data(
+            data,
+            np::dtype::get_builtin<T>(),
+            bp::make_tuple(_pData->size()),
+            bp::make_tuple(sizeof(float)), bp::object(h));
+        if (keyNames == nullptr)
+        {
+            dict[name] = npdata;
+        }
+        else
+        {
+            dict[name] = bp::dict();
+            dict[name]["..Data"] = npdata;
+            bp::list list;
+            for (int j = 0; j < keyNames->size(); j++)
+            {
+                bp::object obj;
+                const std::string& value = keyNames->at(j);
+                if (!value.empty())
+                {
+                    obj = bp::object(value);
+                }
+                list.append(obj);
+            }
+            dict[name]["..KeyValues"] = list;
+        }
+    }
+    break;
+    case DataKind::I8:
+    case DataKind::U1:
+    case DataKind::U2:
+    case DataKind::U4:
+    case DataKind::U8:
+    case DataKind::R4:
+    case DataKind::R8:
+    {
+        bp::handle<> h(::PyCapsule_New((void*)this, NULL, (PyCapsule_Destructor)&destroyManagerCObject));
+        dict[name] = np::from_data(
+            data,
+            np::dtype::get_builtin<T>(),
+            bp::make_tuple(_pData->size()),
+            bp::make_tuple(sizeof(T)), bp::object(h));
+    }
+    break;
+    case DataKind::DT:
+    {
+        bp::handle<> h(::PyCapsule_New((void*)this, NULL, (PyCapsule_Destructor)&destroyManagerCObject));
+        np::ndarray npdata = np::from_data(
+            data,
+            np::dtype::get_builtin<T>(),
+            bp::make_tuple(_pData->size()),
+            bp::make_tuple(sizeof(T)), bp::object(h));
 
-	return found->second(kind, numRows, numCols);
+        dict[name] = bp::dict();
+        dict[name]["..DateTime"] = npdata;
+    }
+    break;
+    }
 }
 
-template <class T> PythonObjectBase* PythonObjectBase::CreateObject(const int& kind, size_t nRows, size_t nColumns)
+template <>
+void PyColumnSingle<std::string>::AddToDict(bp::dict& dict,
+                                            const std::string& name,
+                                            const std::vector<std::string>* keyNames,
+                                            const size_t expectedRows)
 {
-	return new PythonObject<T>(kind, nRows, nColumns);
+    bp::list list;
+    for (size_t i = 0; i < _pData->size(); i++)
+    {
+        bp::object obj;
+        const std::string& value = _pData->at(i);
+        if (!value.empty())
+        {
+            obj = bp::object(value);
+        }
+        list.append(obj);
+    }
+    dict[name] = list;
 }
 
+template <class T, class T2>
+void PyColumnVariable<T, T2>::SetAt(size_t nRow, size_t nCol, const T& value)
+{
+    if ((nRow + 1) > _numRows) _numRows = nRow + 1;
+
+    /*
+     * Make sure there are enough columns for the request.
+     */
+    for (size_t i = _data.size(); i <= nCol; i++)
+    {
+        _data.push_back(new std::vector<T2>());
+    }
+
+    std::vector<T2>* pColData = _data[nCol];
+
+    /*
+     * Fill in any missing row values.
+     */
+    for (size_t i = pColData->size(); i < nRow; i++)
+    {
+        pColData->push_back(GetMissingValue());
+    }
+
+    pColData->push_back(GetConvertedValue(value));
+}
+
+/*
+ * Note: an instance of this object should not be used
+ * and should be considered invalid after the first time
+ * this method has been called.
+ */
+template <class T, class T2>
+void PyColumnVariable<T, T2>::Deleter(PyObject* obj)
+{
+    auto* deleteData = static_cast<PyColumnVariable<T, T2>::DeleteData*>(PyCapsule_GetPointer(obj, NULL));
+
+    PyColumnVariable<T, T2>* instance = deleteData->instance;
+    size_t column = deleteData->column;
+
+    std::vector<T2>* data = instance->_data[column];
+    if (data != nullptr)
+    {
+        instance->_data[column] = nullptr;
+        instance->_numDeletedColumns++;
+        delete data;
+
+        if (instance->_numDeletedColumns == instance->_data.size())
+        {
+            delete instance;
+        }
+    }
+}
+
+template<class T, class T2>
+void PyColumnVariable<T, T2>::AddToDict(bp::dict& dict,
+                                        const std::string& name,
+                                        const std::vector<std::string>* keyNames,
+                                        const size_t expectedRows)
+{
+    size_t numRows = (expectedRows > _numRows) ? expectedRows : _numRows;
+    size_t numCols = _data.size();
+
+    if (numCols == 0)
+    {
+        /*
+         * If there were no values set then create a
+         * column so it can be filled with missing values.
+         */
+        _data.push_back(new std::vector<T2>());
+        numCols = 1;
+    }
+
+    const std::string colNameBase = name + ".";
+    int maxDigits = (int)ceil(log10(numCols));
+    if (maxDigits == 0) maxDigits = 1;
+
+    for (size_t i = 0; i < numCols; i++)
+    {
+        std::vector<T2>* pColData = _data[i];
+
+        /*
+         * Make sure all the columns are the same length.
+         */
+        for (size_t j = pColData->size(); j < numRows; j++)
+        {
+            pColData->push_back(GetMissingValue());
+        }
+
+        std::string colName = std::to_string(i);
+        colName = std::string(maxDigits - colName.length(), '0') + colName;
+        colName = colNameBase + colName;
+
+        AddColumnToDict(dict, colName, i);
+    }
+}
+
+template<class T, class T2>
+void PyColumnVariable<T, T2>::AddColumnToDict(bp::dict& dict,
+                                              const std::string& name,
+                                              size_t index)
+{
+    auto* data = _data[index]->data();
+
+    DeleteData* deleteData = new DeleteData();
+    deleteData->instance = this;
+    deleteData->column = index;
+
+    bp::handle<> h(::PyCapsule_New((void*)deleteData, NULL, (PyCapsule_Destructor)&Deleter));
+    dict[name] = np::from_data(
+        data,
+        np::dtype::get_builtin<T2>(),
+        bp::make_tuple(_data[index]->size()),
+        bp::make_tuple(sizeof(T2)), bp::object(h));
+}
+
+template<>
+void PyColumnVariable<std::string, NullableString>::AddColumnToDict(bp::dict& dict,
+                                                                    const std::string& name,
+                                                                    size_t index)
+{
+    bp::list list;
+    std::vector<NullableString>* pColData = _data[index];
+    size_t numRows = pColData->size();
+
+    for (size_t i = 0; i < numRows; i++)
+    {
+        bp::object obj;
+        NullableString value = pColData->at(i);
+
+        if (value)
+        {
+            obj = bp::object(*value);
+        }
+
+        list.append(obj);
+    }
+
+    dict[name] = list;
+}
diff --git a/src/NativeBridge/PythonInterop.h b/src/NativeBridge/PythonInterop.h
index 9654476a..8929ae39 100644
--- a/src/NativeBridge/PythonInterop.h
+++ b/src/NativeBridge/PythonInterop.h
@@ -2,107 +2,229 @@
 // Licensed under the MIT license.
 
 #pragma once
-
 #include <map>
+#include <vector>
+#include <boost/optional.hpp>
+
 
 // Taken from ML.NET source code. These values should be stable.
 enum DataKind
 {
-	I1 = 1,
-	U1 = 2,
-	I2 = 3,
-	U2 = 4,
-	I4 = 5,
-	U4 = 6,
-	I8 = 7,
-	U8 = 8,
-	R4 = 9,
-	R8 = 10,
-	TX = 11,
-	BL = 12,
-	TS = 13,
-	DT = 14,
-	DZ = 15,
+    I1 = 1,
+    U1 = 2,
+    I2 = 3,
+    U2 = 4,
+    I4 = 5,
+    U4 = 6,
+    I8 = 7,
+    U8 = 8,
+    R4 = 9,
+    R8 = 10,
+    TX = 11,
+    BL = 12,
+    TS = 13,
+    DT = 14,
+    DZ = 15,
 };
 
-class PythonObjectBase
+class PyColumnBase
 {
 private:
-	typedef std::map<int, PythonObjectBase* (*)(const int&, size_t, size_t)> creation_map;
-	typedef std::pair<int, PythonObjectBase* (*)(const int&, size_t, size_t)> creation_map_entry;
+    typedef std::map<int, PyColumnBase* (*)(const int&, size_t)> creation_map;
+    typedef std::pair<int, PyColumnBase* (*)(const int&, size_t)> creation_map_entry;
 
-	static creation_map* m_pCreationMap;
-	static creation_map* CreateMap();
+    static creation_map* m_pSingleCreationMap;
+    static creation_map* CreateSingleMap();
 
-	template <class T> static PythonObjectBase* CreateObject(const int& name, size_t nRows, size_t nColumns);
+    static creation_map* m_pVariableCreationMap;
+    static creation_map* CreateVariableMap();
+
+    template <class T> static PyColumnBase* CreateSingle(const int& kind, size_t nRows);
+    template <class T, class T2> static PyColumnBase* CreateVariable(const int& kind, size_t nRows);
 
 protected:
-	int _kind;
+    int _kind;
 
 public:
-	PythonObjectBase(const int& kind);
-	static PythonObjectBase* CreateObject(const int& kind, size_t numRows, size_t numCols);
-	const int& GetKind() const;
-	void SetKind(int kind);
-	virtual ~PythonObjectBase();
+    static PyColumnBase* Create(const int& kind, size_t numRows, size_t numCols);
+
+    PyColumnBase(const int& kind);
+    virtual ~PyColumnBase();
+
+    const int& GetKind() const { return _kind; }
+    void SetKind(int kind) { _kind = kind; }
+
+    virtual size_t GetNumRows() = 0;
+    virtual size_t GetNumCols() = 0;
 };
 
-inline const int& PythonObjectBase::GetKind() const
-{
-	return _kind;
-}
 
-inline void PythonObjectBase::SetKind(int kind)
+/*
+ * Template typed abstract base class which provides
+ * the required interface for all derived classes.
+ */
+template <class T>
+class PyColumn : public PyColumnBase
 {
-	_kind = kind;
-}
+public:
+    PyColumn(const int& kind) : PyColumnBase(kind) {}
+    virtual ~PyColumn() {}
+    virtual void SetAt(size_t nRow, size_t nCol, const T& value) = 0;
+    virtual void AddToDict(bp::dict& dict,
+                           const std::string& name,
+                           const std::vector<std::string>* keyNames,
+                           const size_t expectedRows) = 0;
+};
 
 
+/*
+ * Handles the single value case.
+ */
 template <class T>
-class PythonObject : public PythonObjectBase
+class PyColumnSingle : public PyColumn<T>
 {
 protected:
-	std::vector<T>* _pData;
-
-	size_t _numRows;
-	size_t _numCols;
+    std::vector<T>* _pData;
 
 public:
-	PythonObject(const int& kind, size_t numRows = 1, size_t numCols = 1);
-	virtual ~PythonObject();
-	void SetAt(size_t nRow, size_t nCol, const T& value);
-	const std::vector<T>* GetData() const;
+    PyColumnSingle(const int& kind, size_t numRows = 0);
+    virtual ~PyColumnSingle();
+    virtual void SetAt(size_t nRow, size_t nCol, const T& value);
+    virtual void AddToDict(bp::dict& dict,
+                           const std::string& name,
+                           const std::vector<std::string>* keyNames,
+                           const size_t expectedRows);
+    virtual size_t GetNumRows();
+    virtual size_t GetNumCols();
+    const std::vector<T>* GetData() const { return _pData; }
 };
 
 template <class T>
-inline PythonObject<T>::PythonObject(const int& kind, size_t numRows, size_t numCols)
-	: PythonObjectBase(kind)
+inline PyColumnSingle<T>::PyColumnSingle(const int& kind, size_t numRows)
+    : PyColumn<T>(kind)
 {
-	_numRows = numRows;
-	_numCols = numCols;
+    _pData = new std::vector<T>();
+    if (numRows > 0) {
+        _pData->reserve(numRows);
+    }
+}
 
-	_pData = new std::vector<T>();
-	if (_numRows > 0)
-		_pData->reserve(_numRows*_numCols);
+template <class T>
+inline PyColumnSingle<T>::~PyColumnSingle()
+{
+    delete _pData;
 }
 
 template <class T>
-inline PythonObject<T>::~PythonObject()
+inline void PyColumnSingle<T>::SetAt(size_t nRow, size_t nCol, const T& value)
 {
-	delete _pData;
+    if (_pData->size() <= nRow)
+        _pData->resize(nRow + 1);
+    _pData->at(nRow) = value;
 }
 
 template <class T>
-inline void PythonObject<T>::SetAt(size_t nRow, size_t nCol, const T& value)
+inline size_t PyColumnSingle<T>::GetNumRows()
 {
-	size_t index = nRow*_numCols + nCol;
-	if (_pData->size() <= index)
-		_pData->resize(index + 1);
-	_pData->at(index) = value;
+    return _pData->size();
 }
 
 template <class T>
-inline const std::vector<T>* PythonObject<T>::GetData() const
+inline size_t PyColumnSingle<T>::GetNumCols()
+{
+    return 1;
+}
+
+
+typedef boost::optional<std::string> NullableString;
+
+/*
+ * Handles the variable value case.
+ */
+template <class T, class T2>
+class PyColumnVariable : public PyColumn<T>
 {
-	return _pData;
-}
\ No newline at end of file
+private:
+    std::vector<std::vector<T2>*> _data;
+
+    size_t _numRows;
+    size_t _numDeletedColumns;
+
+public:
+    PyColumnVariable(const int& kind, size_t numRows = 0);
+    virtual ~PyColumnVariable();
+    virtual void SetAt(size_t nRow, size_t nCol, const T& value);
+    virtual void AddToDict(bp::dict& dict,
+                           const std::string& name,
+                           const std::vector<std::string>* keyNames,
+                           const size_t expectedRows);
+    virtual size_t GetNumRows();
+    virtual size_t GetNumCols();
+
+    T2 GetMissingValue();
+    T2 GetConvertedValue(const T& value);
+
+    void AddColumnToDict(bp::dict& dict, const std::string& name, size_t index);
+
+public:
+    typedef struct
+    {
+        PyColumnVariable* instance;
+        size_t column;
+    } DeleteData;
+
+    static void Deleter(PyObject* obj);
+};
+
+template <class T, class T2>
+inline PyColumnVariable<T, T2>::PyColumnVariable(const int& kind, size_t numRows)
+    : PyColumn<T>(kind),
+      _numRows(numRows),
+      _numDeletedColumns(0)
+{
+}
+
+template <class T, class T2>
+inline PyColumnVariable<T, T2>::~PyColumnVariable()
+{
+    for (unsigned int i = 0; i < _data.size(); i++)
+    {
+        if (_data[i] != nullptr) delete _data[i];
+    }
+}
+
+template <class T, class T2>
+inline size_t PyColumnVariable<T, T2>::GetNumRows()
+{
+    return _numRows;
+}
+
+template <class T, class T2>
+inline size_t PyColumnVariable<T, T2>::GetNumCols()
+{
+    return _data.size();
+}
+
+template <class T, class T2>
+inline T2 PyColumnVariable<T, T2>::GetMissingValue()
+{
+    return NAN;
+}
+
+template <class T, class T2>
+inline T2 PyColumnVariable<T, T2>::GetConvertedValue(const T& value)
+{
+    return (T2)value;
+}
+
+template <>
+inline NullableString PyColumnVariable<std::string, NullableString>::GetMissingValue()
+{
+    return boost::none;
+}
+
+template <>
+inline NullableString PyColumnVariable<std::string, NullableString>::GetConvertedValue(const std::string& value)
+{
+    return value;
+}
diff --git a/src/NativeBridge/UnixInterface.h b/src/NativeBridge/UnixInterface.h
index 0a7c1155..bb2c7fd5 100644
--- a/src/NativeBridge/UnixInterface.h
+++ b/src/NativeBridge/UnixInterface.h
@@ -24,7 +24,7 @@
 #define CORECLR_SHUTDOWN "coreclr_shutdown"
 
 #define DOTNETBRIDGE "DotNetBridge"
-#define DOTNETBRIDGE_FQDN "Microsoft.MachineLearning.DotNetBridge.Bridge"
+#define DOTNETBRIDGE_FQDN "Microsoft.ML.DotNetBridge.Bridge"
 
 #define GET_FN "GetFn"
 
@@ -151,7 +151,7 @@ class UnixMlNetInterface
 
         std::string libsroot(mlnetpath);
         std::string coreclrdir(coreclrpath);
-        if (strlen(dpreppath) == 0) 
+        if (strlen(dpreppath) == 0)
         {
             dpreppath = mlnetpath;
         }
diff --git a/src/NativeBridge/WinInterface.h b/src/NativeBridge/WinInterface.h
index 4f5238db..3548b578 100644
--- a/src/NativeBridge/WinInterface.h
+++ b/src/NativeBridge/WinInterface.h
@@ -302,7 +302,7 @@ class WinMlNetInterface
         HRESULT hr = host->CreateDelegate(
             _domainId,
             W("DotNetBridge"),
-            W("Microsoft.MachineLearning.DotNetBridge.Bridge"),
+            W("Microsoft.ML.DotNetBridge.Bridge"),
             W("GetFn"),
             &getter);
         if (FAILED(hr))
diff --git a/src/NativeBridge/dllmain.cpp b/src/NativeBridge/dllmain.cpp
index 0dafd696..1a7a297d 100644
--- a/src/NativeBridge/dllmain.cpp
+++ b/src/NativeBridge/dllmain.cpp
@@ -7,6 +7,7 @@
 #include "ManagedInterop.h"
 
 #define PARAM_SEED "seed"
+#define PARAM_MAX_SLOTS "max_slots"
 #define PARAM_GRAPH "graph"
 #define PARAM_VERBOSE "verbose"
 #define PARAM_MLNET_PATH "mlnetPath"
@@ -75,15 +76,15 @@ bp::dict pxCall(bp::dict& params)
         bp::extract<std::string> mlnetPath(params[PARAM_MLNET_PATH]);
         bp::extract<std::string> dotnetClrPath(params[PARAM_DOTNETCLR_PATH]);
         bp::extract<std::string> dprepPath(params[PARAM_DPREP_PATH]);
-		bp::extract<std::string> pythonPath(params[PARAM_PYTHON_PATH]);
-		bp::extract<std::int32_t> verbose(params[PARAM_VERBOSE]);
+        bp::extract<std::string> pythonPath(params[PARAM_PYTHON_PATH]);
+        bp::extract<std::int32_t> verbose(params[PARAM_VERBOSE]);
         std::int32_t i_verbose = std::int32_t(verbose);
         std::string s_mlnetPath = std::string(mlnetPath);
         std::string s_dotnetClrPath = std::string(dotnetClrPath);
         std::string s_dprepPath = std::string(dprepPath);
         std::string s_pythonPath = std::string(pythonPath);
-		std::string s_graph = std::string(graph);
-		const char *mlnetpath = s_mlnetPath.c_str();
+        std::string s_graph = std::string(graph);
+        const char *mlnetpath = s_mlnetPath.c_str();
         const char *coreclrpath = s_dotnetClrPath.c_str();
         const char *dpreppath = s_dprepPath.c_str();
 
@@ -96,7 +97,11 @@ bp::dict pxCall(bp::dict& params)
         if (params.has_key(PARAM_SEED))
             seed = bp::extract<int>(params[PARAM_SEED]);
 
-        EnvironmentBlock env(i_verbose, 0, seed, s_pythonPath.c_str());
+        int maxSlots = -1;
+        if (params.has_key(PARAM_MAX_SLOTS))
+            maxSlots = bp::extract<int>(params[PARAM_MAX_SLOTS]);
+
+        EnvironmentBlock env(i_verbose, maxSlots, seed, s_pythonPath.c_str());
         int retCode;
         if (params.has_key(PARAM_DATA) && bp::extract<bp::dict>(params[PARAM_DATA]).check())
         {
@@ -112,8 +117,7 @@ bp::dict pxCall(bp::dict& params)
         res = env.GetData();
 
         if (retCode == -1)
-            // REVIEW: get the content of IChannel and add it the the error message.
-            throw std::runtime_error("Returned code is -1. Check the log for error messages.");
+            throw std::runtime_error(env.GetErrorMessage());
     }
     catch (const std::exception& e)
     {
diff --git a/src/NativeBridge/stdafx.h b/src/NativeBridge/stdafx.h
index f5fe57f1..91c2f2fb 100644
--- a/src/NativeBridge/stdafx.h
+++ b/src/NativeBridge/stdafx.h
@@ -81,7 +81,7 @@ class StopWatch
     ~StopWatch()
     {
         auto endTime = std::chrono::high_resolution_clock::now();
-        
+
         std::stringstream buffer;
         buffer << m_description << ":" << ((endTime - m_startTime).count() / 1000000) << " msecs" << std::endl;
 
diff --git a/src/Platforms/build.csproj b/src/Platforms/build.csproj
index 75fa806f..3db67054 100644
--- a/src/Platforms/build.csproj
+++ b/src/Platforms/build.csproj
@@ -11,19 +11,19 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.ML" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.CpuMath" Version="1.3.1" />    
-    <PackageReference Include="Microsoft.ML.EntryPoints" Version="0.15.1" />
-    <PackageReference Include="Microsoft.ML.Mkl.Components" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.ImageAnalytics" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.LightGBM" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.OnnxTransformer" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.TensorFlow" Version="1.3.1" />
-    <PackageReference Include="Microsoft.ML.Dnn" Version="0.15.1" />
-    <PackageReference Include="Microsoft.ML.Ensemble" Version="0.15.1" />
-    <PackageReference Include="Microsoft.ML.TimeSeries" Version="1.3.1" />
+    <PackageReference Include="Microsoft.ML" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.CpuMath" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.EntryPoints" Version="0.16.0-preview2" />
+    <PackageReference Include="Microsoft.ML.Mkl.Components" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.ImageAnalytics" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.LightGBM" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.OnnxTransformer" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.TensorFlow" Version="1.4.0-preview2" />
+    <PackageReference Include="Microsoft.ML.Dnn" Version="0.16.0-preview2" />
+    <PackageReference Include="Microsoft.ML.Ensemble" Version="0.16.0-preview2" />
+    <PackageReference Include="Microsoft.ML.TimeSeries" Version="1.4.0-preview2" />
     <PackageReference Include="Microsoft.DataPrep" Version="0.0.1.12-preview" />
-    <PackageReference Include="TensorFlow.NET" Version="0.10.10" />
+    <PackageReference Include="TensorFlow.NET" Version="0.11.3" />
     <PackageReference Include="SciSharp.TensorFlow.Redist" Version="1.14.0" />
   </ItemGroup>
 
diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj
index a0ac2115..df43c116 100644
--- a/src/python/nimbusml.pyproj
+++ b/src/python/nimbusml.pyproj
@@ -67,9 +67,7 @@
     <Compile Include="nimbusml\ensemble\sub_model_selector\__init__.py" />
     <Compile Include="nimbusml\ensemble\sub_model_selector\diversity_measure\_classifierdisagreement.py" />
     <Compile Include="nimbusml\ensemble\sub_model_selector\diversity_measure\_regressordisagreement.py" />
-    <Compile Include="nimbusml\ensemble\sub_model_selector\diversity_measure\__init__.py">
-      <SubType>Code</SubType>
-    </Compile>
+    <Compile Include="nimbusml\ensemble\sub_model_selector\diversity_measure\__init__.py" />
     <Compile Include="nimbusml\ensemble\subset_selector\_allinstanceselector.py" />
     <Compile Include="nimbusml\ensemble\subset_selector\_bootstrapselector.py" />
     <Compile Include="nimbusml\ensemble\subset_selector\_randompartitionselector.py" />
@@ -93,6 +91,9 @@
     <Compile Include="nimbusml\examples\BootStrapSample.py" />
     <Compile Include="nimbusml\examples\CharTokenizer.py" />
     <Compile Include="nimbusml\examples\ColumnConcatenator.py" />
+    <Compile Include="nimbusml\examples\examples_from_dataframe\PrefixColumnConcatenator_df.py" />
+    <Compile Include="nimbusml\examples\WordTokenizer.py" />
+    <Compile Include="nimbusml\examples\PrefixColumnConcatenator.py" />
     <Compile Include="nimbusml\examples\ColumnDropper.py" />
     <Compile Include="nimbusml\examples\ColumnDuplicator.py" />
     <Compile Include="nimbusml\examples\ColumnSelector.py" />
@@ -102,6 +103,7 @@
     <Compile Include="nimbusml\examples\examples_from_dataframe\AveragedPerceptronBinaryClassifier_infert_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\Binner_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\BootStrapSample_df.py" />
+    <Compile Include="nimbusml\examples\examples_from_dataframe\WordTokenizer_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\CharTokenizer_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\ColumnConcatenator_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\ColumnDuplicator_df.py" />
@@ -120,6 +122,7 @@
     <Compile Include="nimbusml\examples\examples_from_dataframe\Filter_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\GamBinaryClassifier_infert_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\GamRegressor_airquality_df.py" />
+    <Compile Include="nimbusml\examples\examples_from_dataframe\LpScaler_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\GlobalContrastRowScaler_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\Handler_df.py" />
     <Compile Include="nimbusml\examples\examples_from_dataframe\IidChangePointDetector_df.py" />
@@ -172,11 +175,14 @@
     <Compile Include="nimbusml\examples\Filter.py" />
     <Compile Include="nimbusml\examples\GamBinaryClassifier.py" />
     <Compile Include="nimbusml\examples\GamRegressor.py" />
+    <Compile Include="nimbusml\examples\LpScaler.py" />
     <Compile Include="nimbusml\examples\GlobalContrastRowScaler.py" />
     <Compile Include="nimbusml\examples\Handler.py" />
     <Compile Include="nimbusml\examples\Hinge.py" />
     <Compile Include="nimbusml\examples\IidChangePointDetector.py" />
     <Compile Include="nimbusml\examples\LinearSvmBinaryClassifier.py" />
+    <Compile Include="nimbusml\examples\PermutationFeatureImportance.py" />
+    <Compile Include="nimbusml\examples\Schema.py" />
     <Compile Include="nimbusml\examples\SsaForecaster.py" />
     <Compile Include="nimbusml\examples\SsaChangePointDetector.py" />
     <Compile Include="nimbusml\examples\SsaSpikeDetector.py" />
@@ -295,11 +301,15 @@
     <Compile Include="nimbusml\internal\core\loss\__init__.py" />
     <Compile Include="nimbusml\internal\core\multiclass\_onevsrestclassifier.py" />
     <Compile Include="nimbusml\internal\core\multiclass\__init__.py" />
+    <Compile Include="nimbusml\internal\core\preprocessing\_datasettransformer.py" />
     <Compile Include="nimbusml\internal\core\preprocessing\filter\_skipfilter.py" />
     <Compile Include="nimbusml\internal\core\preprocessing\filter\_takefilter.py" />
+    <Compile Include="nimbusml\internal\core\preprocessing\normalization\_lpscaler.py" />
+    <Compile Include="nimbusml\internal\core\preprocessing\schema\_prefixcolumnconcatenator.py" />
     <Compile Include="nimbusml\internal\core\preprocessing\schema\_columnduplicator.py" />
     <Compile Include="nimbusml\internal\core\preprocessing\schema\_columndropper.py" />
     <Compile Include="nimbusml\internal\core\preprocessing\_tensorflowscorer.py" />
+    <Compile Include="nimbusml\internal\core\preprocessing\text\_wordtokenizer.py" />
     <Compile Include="nimbusml\internal\core\timeseries\_iidchangepointdetector.py" />
     <Compile Include="nimbusml\internal\core\timeseries\_iidspikedetector.py" />
     <Compile Include="nimbusml\internal\core\timeseries\_ssachangepointdetector.py" />
@@ -337,6 +347,7 @@
     <Compile Include="nimbusml\internal\entrypoints\models_regressionensemble.py" />
     <Compile Include="nimbusml\internal\entrypoints\models_regressionevaluator.py" />
     <Compile Include="nimbusml\internal\entrypoints\models_regressionpipelineensemble.py" />
+    <Compile Include="nimbusml\internal\entrypoints\models_schema.py" />
     <Compile Include="nimbusml\internal\entrypoints\models_summarizer.py" />
     <Compile Include="nimbusml\internal\entrypoints\models_traintestevaluator.py" />
     <Compile Include="nimbusml\internal\entrypoints\timeseriesprocessingentrypoints_exponentialaverage.py" />
@@ -384,6 +395,8 @@
     <Compile Include="nimbusml\internal\entrypoints\transforms_categoricalhashonehotvectorizer.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_categoricalonehotvectorizer.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_charactertokenizer.py" />
+    <Compile Include="nimbusml\internal\entrypoints\transforms_datasetscorerex.py" />
+    <Compile Include="nimbusml\internal\entrypoints\transforms_prefixcolumnconcatenator.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_columnconcatenator.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_columncopier.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_columnselector.py" />
@@ -421,9 +434,11 @@
     <Compile Include="nimbusml\internal\entrypoints\transforms_missingvaluesubstitutor.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_modelcombiner.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_ngramtranslator.py" />
+    <Compile Include="nimbusml\internal\entrypoints\transforms_variablecolumn.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_nooperation.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_optionalcolumncreator.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_pcacalculator.py" />
+    <Compile Include="nimbusml\internal\entrypoints\transforms_permutationfeatureimportance.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_predictedlabelcolumnoriginalvalueconverter.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_randomnumbergenerator.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_rowrangefilter.py" />
@@ -442,6 +457,7 @@
     <Compile Include="nimbusml\internal\entrypoints\transforms_twoheterogeneousmodelcombiner.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_vectortoimage.py" />
     <Compile Include="nimbusml\internal\entrypoints\transforms_wordembeddings.py" />
+    <Compile Include="nimbusml\internal\entrypoints\transforms_wordtokenizer.py" />
     <Compile Include="nimbusml\internal\entrypoints\_boosterparameterfunction_dart.py" />
     <Compile Include="nimbusml\internal\entrypoints\_boosterparameterfunction_gbdt.py" />
     <Compile Include="nimbusml\internal\entrypoints\_boosterparameterfunction_goss.py" />
@@ -613,6 +629,7 @@
     <Compile Include="nimbusml\naive_bayes\_naivebayesclassifier.py" />
     <Compile Include="nimbusml\naive_bayes\__init__.py" />
     <Compile Include="nimbusml\_pipeline.py" />
+    <Compile Include="nimbusml\preprocessing\_datasettransformer.py" />
     <Compile Include="nimbusml\preprocessing\filter\_bootstrapsampler.py" />
     <Compile Include="nimbusml\preprocessing\filter\_rangefilter.py" />
     <Compile Include="nimbusml\preprocessing\filter\_skipfilter.py" />
@@ -626,9 +643,11 @@
     <Compile Include="nimbusml\preprocessing\normalization\_binner.py" />
     <Compile Include="nimbusml\preprocessing\normalization\_globalcontrastrowscaler.py" />
     <Compile Include="nimbusml\preprocessing\normalization\_logmeanvariancescaler.py" />
+    <Compile Include="nimbusml\preprocessing\normalization\_lpscaler.py" />
     <Compile Include="nimbusml\preprocessing\normalization\_meanvariancescaler.py" />
     <Compile Include="nimbusml\preprocessing\normalization\_minmaxscaler.py" />
     <Compile Include="nimbusml\preprocessing\normalization\__init__.py" />
+    <Compile Include="nimbusml\preprocessing\schema\_prefixcolumnconcatenator.py" />
     <Compile Include="nimbusml\preprocessing\schema\_columnconcatenator.py" />
     <Compile Include="nimbusml\preprocessing\schema\_columndropper.py" />
     <Compile Include="nimbusml\preprocessing\schema\_columnduplicator.py" />
@@ -637,11 +656,13 @@
     <Compile Include="nimbusml\preprocessing\schema\__init__.py" />
     <Compile Include="nimbusml\preprocessing\_tensorflowscorer.py" />
     <Compile Include="nimbusml\preprocessing\text\_chartokenizer.py" />
+    <Compile Include="nimbusml\preprocessing\text\_wordtokenizer.py" />
     <Compile Include="nimbusml\preprocessing\text\__init__.py" />
     <Compile Include="nimbusml\preprocessing\_tokey.py" />
     <Compile Include="nimbusml\preprocessing\__init__.py" />
     <Compile Include="nimbusml\tests\cluster\test_kmeansplusplus.py" />
     <Compile Include="nimbusml\tests\cluster\__init__.py" />
+    <Compile Include="nimbusml\tests\data_type\test_datetime.py" />
     <Compile Include="nimbusml\tests\data_type\test_numeric.py" />
     <Compile Include="nimbusml\tests\data_type\test_text.py" />
     <Compile Include="nimbusml\tests\data_type\test_text_label.py" />
@@ -665,9 +686,20 @@
     <Compile Include="nimbusml\tests\feature_extraction\text\test_sentiment.py" />
     <Compile Include="nimbusml\tests\idv\__init__.py" />
     <Compile Include="nimbusml\tests\linear_model\test_linearsvmbinaryclassifier.py" />
+    <Compile Include="nimbusml\tests\pipeline\test_csr_input.py" />
+    <Compile Include="nimbusml\tests\pipeline\test_permutation_feature_importance.py" />
+    <Compile Include="nimbusml\tests\pipeline\test_pipeline_get_schema.py" />
+    <Compile Include="nimbusml\tests\pipeline\test_pipeline_split_models.py" />
     <Compile Include="nimbusml\tests\pipeline\test_pipeline_combining.py" />
     <Compile Include="nimbusml\tests\pipeline\test_pipeline_subclassing.py" />
+    <Compile Include="nimbusml\tests\pipeline\test_pipeline_transform_method.py" />
+    <Compile Include="nimbusml\tests\preprocessing\normalization\test_lpscaler.py" />
     <Compile Include="nimbusml\tests\preprocessing\normalization\test_meanvariancescaler.py" />
+    <Compile Include="nimbusml\tests\preprocessing\schema\test_prefixcolumnconcatenator.py" />
+    <Compile Include="nimbusml\tests\preprocessing\test_datasettransformer.py" />
+    <Compile Include="nimbusml\tests\preprocessing\text\test_wordtokenizer.py" />
+    <Compile Include="nimbusml\tests\test_csr_matrix_output.py" />
+    <Compile Include="nimbusml\tests\test_variable_column.py" />
     <Compile Include="nimbusml\tests\timeseries\test_iidchangepointdetector.py" />
     <Compile Include="nimbusml\tests\timeseries\test_ssaforecaster.py" />
     <Compile Include="nimbusml\tests\timeseries\test_ssachangepointdetector.py" />
diff --git a/src/python/nimbusml/__init__.py b/src/python/nimbusml/__init__.py
index 3abbc2ef..55a92107 100644
--- a/src/python/nimbusml/__init__.py
+++ b/src/python/nimbusml/__init__.py
@@ -2,7 +2,7 @@
 Microsoft Machine Learning for Python
 """
 
-__version__ = '1.3.1'
+__version__ = '1.5.0'
 
 # CoreCLR version of MicrosoftML is built on Windows.
 # But file permissions are not preserved when it's copied to Linux.
diff --git a/src/python/nimbusml/_pipeline.py b/src/python/nimbusml/_pipeline.py
index 692e1dea..3e0dce27 100644
--- a/src/python/nimbusml/_pipeline.py
+++ b/src/python/nimbusml/_pipeline.py
@@ -19,6 +19,7 @@
 from scipy.sparse import csr_matrix
 from sklearn.utils.validation import check_X_y, check_array
 from sklearn.utils.multiclass import unique_labels
+from zipfile import ZipFile
 
 from .internal.core.base_pipeline_item import BasePipelineItem
 from .internal.entrypoints.data_customtextloader import \
@@ -38,8 +39,11 @@
 from .internal.entrypoints.models_regressionevaluator import \
     models_regressionevaluator
 from .internal.entrypoints.models_summarizer import models_summarizer
-from .internal.entrypoints.transforms_datasetscorer import \
-    transforms_datasetscorer
+from .internal.entrypoints.models_schema import models_schema
+from .internal.entrypoints.transforms_datasetscorerex import \
+    transforms_datasetscorerex
+from .internal.entrypoints.transforms_datasettransformscorer import \
+    transforms_datasettransformscorer
 from .internal.entrypoints.transforms_featurecombiner import \
     transforms_featurecombiner
 from .internal.entrypoints.transforms_featurecontributioncalculationtransformer import \
@@ -56,6 +60,8 @@
     transforms_modelcombiner
 from .internal.entrypoints.transforms_optionalcolumncreator import \
     transforms_optionalcolumncreator
+from .internal.entrypoints.transforms_permutationfeatureimportance import \
+    transforms_permutationfeatureimportance
 from .internal.entrypoints \
     .transforms_predictedlabelcolumnoriginalvalueconverter import \
     transforms_predictedlabelcolumnoriginalvalueconverter
@@ -67,7 +73,7 @@
 from .internal.utils.data_schema import DataSchema
 from .internal.utils.data_stream import DataStream, ViewDataStream, \
     FileDataStream, BinaryDataStream
-from .internal.utils.entrypoints import Graph
+from .internal.utils.entrypoints import Graph, DataOutputFormat
 from .internal.utils.schema_helper import _extract_label_column
 from .internal.utils.utils import trace, unlist
 
@@ -275,7 +281,7 @@ def nodes(self):
 
     @property
     def last_node(self):
-        if len(self.steps) <= 0:
+        if not self.steps:
             raise TypeError("No steps given.")
         last_step = self.steps[-1]
         return last_step if not isinstance(last_step, tuple) else \
@@ -561,9 +567,12 @@ def _init_graph_nodes(
             inputs = OrderedDict([(file_data.replace('$', ''), '')])
 
         # connect transform node inputs/outputs
-        if feature_columns is None and not isinstance(X, BinaryDataStream):
+        if feature_columns is None:
             if schema is None:
-                schema = DataSchema.read_schema(X)
+                if isinstance(X, BinaryDataStream):
+                    schema = X.schema
+                else:
+                    schema = DataSchema.read_schema(X)
             feature_columns = [c.Name for c in schema]
             if label_column:
                 # if label_column is a string, remove it from
@@ -596,6 +605,14 @@ def _init_graph_nodes(
             output_data=output_data,
             output_model=output_model,
             strategy_iosklearn=strategy_iosklearn)
+
+        for node in enumerate([n for n in transform_nodes
+                               if n.name == 'Models.DatasetTransformer']):
+            input_name = 'dataset_transformer_model' + str(node[0])
+            inputs[input_name] = node[1].inputs['TransformModel']
+            node[1].inputs['TransformModel'] = '$' + input_name
+            node[1].input_variables.add(node[1].inputs['TransformModel'])
+
         graph_nodes['transform_nodes'] = transform_nodes
         return graph_nodes, feature_columns, inputs, transform_nodes, \
             columns_out
@@ -644,8 +661,7 @@ def _update_graph_nodes_for_learner(
             else:
                 raise NotImplementedError(
                     "Strategy '{0}' to handle unspecified inputs is not "
-                    "implemented".format(
-                        strategy_iosklearn))
+                    "implemented".format(strategy_iosklearn))
 
             if label_column is not None or last_node._use_role(Role.Label):
                 if getattr(last_node, 'label_column_name_', None):
@@ -668,8 +684,7 @@ def _update_graph_nodes_for_learner(
                 last_node.label_column_name = None
                 label_column = None
 
-            if weight_column is not None or last_node._use_role(
-                    Role.Weight):
+            if weight_column is not None or last_node._use_role(Role.Weight):
                 if getattr(last_node, 'example_weight_column_name', None):
                     weight_column = last_node.example_weight_column_name
                 elif weight_column:
@@ -681,8 +696,7 @@ def _update_graph_nodes_for_learner(
             if (hasattr(last_node, 'row_group_column_name_')
                     and last_node.row_group_column_name_ is not None):
                 group_id_column = last_node.row_group_column_name_
-            elif (hasattr(last_node,
-                          'row_group_column_name') and
+            elif (hasattr(last_node, 'row_group_column_name') and
                   last_node.row_group_column_name is not None):
                 group_id_column = last_node.row_group_column_name
             else:
@@ -704,10 +718,8 @@ def _update_graph_nodes_for_learner(
 
             # todo: ideally all the nodes have the same name for params
             # so we dont have to distinguish if its learner or
-            # transformer. We will supply
-            # input_data, output_data & output_model vars. Its up to
-            # node to
-            # use suplied vars
+            # transformer. We will supply input_data, output_data and
+            # output_model vars. Its up to node to use suplied vars.
             learner_node = last_node._get_node(
                 feature_column_name=learner_features,
                 training_data=output_data,
@@ -734,6 +746,7 @@ def _fit_graph(self, X, y, verbose, **params):
         output_binary_data_stream = params.pop(
             'output_binary_data_stream', False)
         params.pop('parallel', None)
+        do_output_predictor_model = params.pop('output_predictor_model', None)
 
         X, y, columns_renamed, feature_columns, label_column, schema, \
             weights, weight_column = self._preprocess_X_y(X, y, weights)
@@ -748,6 +761,7 @@ def _fit_graph(self, X, y, verbose, **params):
         input_data = "$input_data"
         output_data = "$output_data"
         output_model = "$output_model"
+        output_predictor_model = "$output_predictor_model"
         predictor_model = "$predictor_model"
 
         graph_nodes, feature_columns, inputs, transform_nodes, \
@@ -762,10 +776,13 @@ def _fit_graph(self, X, y, verbose, **params):
             self._update_graph_nodes_for_learner(
                 graph_nodes,
                 transform_nodes,
-                columns_out, label_column,
+                columns_out, 
+                label_column,
                 weight_column,
-                output_data, output_model,
-                predictor_model, y,
+                output_data,
+                output_model,
+                predictor_model,
+                y,
                 strategy_iosklearn=strategy_iosklearn)
 
         # graph_nodes contain graph sections, which is needed for CV.
@@ -775,19 +792,33 @@ def _fit_graph(self, X, y, verbose, **params):
         graph_nodes = list(itertools.chain(*graph_nodes.values()))
 
         # combine output models
-        transform_models = [node.outputs["Model"]
-                            for node in graph_nodes if
-                            "Model" in node.outputs]
-        if learner_node and len(
-                transform_models) > 0:  # no need to combine if there is
-            #  only 1 model
+        transform_models = []
+        for node in graph_nodes:
+            if node.name == 'Models.DatasetTransformer':
+                transform_models.append(node.inputs['TransformModel'])
+            elif "Model" in node.outputs:
+                transform_models.append(node.outputs["Model"])
+        # no need to combine if there is only 1 model
+        if learner_node and len(transform_models) > 0:
             combine_model_node = transforms_manyheterogeneousmodelcombiner(
                 transform_models=transform_models,
-                predictor_model=(
-                    predictor_model if learner_node else None),
+                predictor_model=predictor_model,
                 model=output_model)
             combine_model_node._implicit = True
             graph_nodes.append(combine_model_node)
+            if do_output_predictor_model: 
+                # get implicit_nodes and build predictor model only
+                implicit_nodes = graph_sections['implicit_nodes']
+                implicit_transform_models = []
+                for node in implicit_nodes:
+                    if "Model" in node.outputs:
+                        implicit_transform_models.append(node.outputs["Model"])
+                output_predictor_model_node = transforms_manyheterogeneousmodelcombiner(
+                    transform_models=implicit_transform_models,
+                    predictor_model=predictor_model,
+                    model=output_predictor_model)
+                output_predictor_model_node._implicit = True
+                graph_nodes.append(output_predictor_model_node)
         elif len(transform_models) > 1:
             combine_model_node = transforms_modelcombiner(
                 models=transform_models,
@@ -796,12 +827,13 @@ def _fit_graph(self, X, y, verbose, **params):
             graph_nodes.append(combine_model_node)
         elif len(graph_nodes) == 0:
             raise RuntimeError(
-                "Unable to process the pipeline len(transform_models)={"
-                "0}.".format(
-                    len(transform_models)))
+                "Unable to process the pipeline len(transform_models)={0}.".
+                    format(len(transform_models)))
 
         # create the graph
         outputs = OrderedDict([(output_model.replace('$', ''), '')])
+        if do_output_predictor_model:
+            outputs[output_predictor_model.replace('$', '')] = ''
         # REVIEW: ideally we should remove output completely from the
         # graph if its not needed
         # however graph validation logic prevents doing that at the moment,
@@ -809,10 +841,18 @@ def _fit_graph(self, X, y, verbose, **params):
         if learner_node is None:  # last node is transformer
             outputs[output_data.replace(
                 '$', '')] = '' if do_fit_transform else '<null>'
+
+        data_output_format = DataOutputFormat.DF
+        if do_fit_transform:
+            if output_binary_data_stream:
+                data_output_format = DataOutputFormat.IDV
+            elif params.pop('as_csr', False):
+                data_output_format = DataOutputFormat.CSR
+
         graph = Graph(
             inputs,
             outputs,
-            do_fit_transform and output_binary_data_stream,
+            data_output_format,
             *(graph_nodes))
 
         # Checks that every parameter in params was used.
@@ -1131,10 +1171,9 @@ def move_information_about_roles_once_used():
 
         # run the graph
         # REVIEW: we should have the possibility to keep the model in
-        # memory
-        # and not in a file.
+        # memory and not in a file.
         try:
-            (out_model, out_data, out_metrics) = graph.run(
+            (out_model, out_data, out_metrics, out_predictor_model) = graph.run(
                 X=X,
                 y=y,
                 random_state=self.random_state,
@@ -1160,6 +1199,8 @@ def move_information_about_roles_once_used():
         move_information_about_roles_once_used()
         self.graph_ = graph
         self.model = out_model
+        if out_predictor_model:
+            self.predictor_model = out_predictor_model
         self.data = out_data
         # stop the clock
         self._run_time = time.time() - start_time
@@ -1371,9 +1412,7 @@ def _process_learner(
             optional_node = transforms_optionalcolumncreator(
                 column=[label],
                 data="$input_data" if num_transforms == 0 else
-                output_data +
-                str(
-                    num_transforms),
+                output_data + str(num_transforms),
                 output_data="$optional_data",
                 model=output_model + str(num_transforms + 1))
             optional_node._implicit = True
@@ -1381,24 +1420,20 @@ def _process_learner(
                 data="$optional_data",
                 label_column=label,
                 output_data="$label_data",
-                model=output_model + str(
-                    num_transforms + 2))
+                model=output_model + str(num_transforms + 2))
             label_node._implicit = True
             feature_node = transforms_featurecombiner(
                 data="$label_data",
                 features=features,
                 output_data=output_data,
-                model=output_model + str(
-                    num_transforms + 3))
+                model=output_model + str(num_transforms + 3))
             feature_node._implicit = True
             implicit_nodes = [optional_node, label_node, feature_node]
         elif learner.type in ('classifier', 'ranker'):
             optional_node = transforms_optionalcolumncreator(
                 column=[label],
                 data="$input_data" if num_transforms == 0 else
-                output_data +
-                str(
-                    num_transforms),
+                output_data + str(num_transforms),
                 output_data="$optional_data",
                 model=output_model + str(num_transforms + 1))
             optional_node._implicit = True
@@ -1409,25 +1444,20 @@ def _process_learner(
                 text_key_values=False,
                 model=output_model + str(num_transforms + 2))
             label_node._implicit = True
-
             feature_node = transforms_featurecombiner(
                 data="$label_data",
                 features=features,
                 output_data=output_data,
-                model=output_model + str(
-                    num_transforms + 3))
+                model=output_model + str(num_transforms + 3))
             feature_node._implicit = True
             implicit_nodes = [optional_node, label_node, feature_node]
         elif learner.type in {'recommender', 'sequence'}:
-            raise NotImplementedError(
-                "Type '{0}' is not implemented yet.".format(
-                    learner.type))
+            raise NotImplementedError("Type '{0}' is not implemented yet.".
+                                      format(learner.type))
         else:
             feature_node = transforms_featurecombiner(
                 data="$input_data" if num_transforms == 0 else
-                output_data +
-                str(
-                    num_transforms),
+                output_data + str(num_transforms),
                 features=features,
                 output_data=output_data,
                 model=output_model + str(num_transforms + 1))
@@ -1713,20 +1743,20 @@ def get_feature_contributions(self, X, top=10, bottom=10, verbose=0,
             to report.
         :param bottom: The number of negative contributions with highest
             magnitude to report.
-        :return: dataframe of containing the raw data, predicted label, score,
+        :return: dataframe containing the raw data, predicted label, score,
             probabilities, and feature contributions.
         """
         self.verbose = verbose
 
         if not self._is_fitted:
             raise ValueError(
-                "Model is not fitted. Train or load a model before test().")
+                "Model is not fitted. Train or load a model before.")
 
         if len(self.steps) > 0:
             last_node = self.last_node
             if last_node.type == 'transform':
                 raise ValueError(
-                    "Pipeline needs a trainer as last step for test()")
+                    "Pipeline needs a trainer as last step.")
 
         X, y_temp, columns_renamed, feature_columns, label_column, \
             schema, weights, weight_column = self._preprocess_X_y(X)
@@ -1742,7 +1772,7 @@ def get_feature_contributions(self, X, top=10, bottom=10, verbose=0,
             all_nodes = [importtext_node]
             inputs = dict([('file', ''), ('predictor_model', self.model)])
 
-        score_node = transforms_datasetscorer(
+        score_node = transforms_datasetscorerex(
             data="$data",
             predictor_model="$predictor_model",
             scored_data="$scoredvectordata")
@@ -1759,10 +1789,203 @@ def get_feature_contributions(self, X, top=10, bottom=10, verbose=0,
 
         outputs = dict(output_data="")
 
+        data_output_format = DataOutputFormat.IDV if as_binary_data_stream \
+                             else DataOutputFormat.DF,
+
+        graph = Graph(
+            inputs,
+            outputs,
+            data_output_format,
+            *all_nodes)
+
+        class_name = type(self).__name__
+        method_name = inspect.currentframe().f_code.co_name
+        telemetry_info = ".".join([class_name, method_name])
+
+        try:
+            (out_model, out_data, out_metrics, _) = graph.run(
+                X=X,
+                random_state=self.random_state,
+                model=self.model,
+                verbose=verbose,
+                telemetry_info=telemetry_info,
+                **params)
+        except RuntimeError as e:
+            raise e
+
+        return out_data
+
+    def get_output_columns(self, verbose=0, **params):
+        """
+        Returns the output list of columns for the fitted model.
+        :return: list .
+        """
+        self.verbose = verbose
+
+        if not self._is_fitted:
+            raise ValueError(
+                "Model is not fitted. Train or load a model before.")
+
+        if len(self.steps) > 0:
+            last_node = self.last_node
+            if last_node.type != 'transform':
+                raise ValueError(
+                    "Pipeline needs a transformer as last step.")
+
+        inputs = dict([('transform_model', self.model)])
+        schema_node = models_schema(
+            transform_model="$transform_model",
+            schema="$output_data")
+        all_nodes = [schema_node]
+
+        outputs = dict(output_data="")
+
+        graph = Graph(
+            inputs,
+            outputs,
+            DataOutputFormat.LIST,
+            *all_nodes)
+
+        try:
+            (_, out_data, _, _) = graph.run(
+                X=None,
+                y=None,
+                random_state=self.random_state,
+                model=self.model,
+                no_input_data=True,
+                verbose=verbose,
+                **params)
+        except RuntimeError as e:
+            raise e
+
+        return out_data
+    
+    @trace
+    def permutation_feature_importance(self, X, number_of_examples=None,
+                                       permutation_count=1,
+                                       filter_zero_weight_features=False,
+                                       verbose=0, as_binary_data_stream=False,
+                                       **params):
+        """
+        Permutation feature importance (PFI) is a technique to determine the
+        global importance of features in a trained machine learning model. PFI
+        is a simple yet powerful technique motivated by Breiman in section 10
+        of his Random Forests paper (Machine Learning, 2001). The advantage of
+        the PFI method is that it is model agnostic - it works with any model
+        that can be evaluated - and it can use any dataset, not just the
+        training set, to compute feature importance metrics.
+        
+        PFI works by taking a labeled dataset, choosing a feature, and
+        permuting the values for that feature across all the examples, so that
+        each example now has a random value for the feature and the original
+        values for all other features. The evaluation metric (e.g. NDCG) is
+        then calculated for this modified dataset, and the change in the
+        evaluation metric from the original dataset is computed. The larger the
+        change in the evaluation metric, the more important the feature is to
+        the model, i.e. the most important features are those that the model is
+        most sensitive to. PFI works by performing this permutation analysis
+        across all the features of a model, one after another.
+
+        Note that for increasing metrics (e.g. AUC, accuracy, R-Squared, NDCG),
+        the most important features will be those with the highest negative
+        mean change in the metric. Conversely, for decreasing metrics (e.g.
+        Mean Squared Error, Log loss), the most important features will be
+        those with the highest positive mean change in the metric.
+
+        PFI is supported for binary classifiers, classifiers, regressors, and
+        rankers.
+        
+        The mean changes and statndard errors of the means are evaluated for
+        the following metrics are evaluated for PFI:
+
+        * Binary Classification:
+
+            * Area under ROC curve (AUC)
+            * Accuracy
+            * Positive precision
+            * Positive recall
+            * Negative precision
+            * Negative recall
+            * F1 score
+            * Area under Precision-Recall curve (AUPRC)
+
+        * Multiclass classification:
+
+            * Macro accuracy
+            * Micro accuracy
+            * Log loss
+            * Log loss reduction
+            * Top k accuracy
+            * Per-class log loss
+
+        * Regression:
+        
+            * Mean absolute error (MAE)
+            * Mean squared error (MSE)
+            * Root mean squared error (RMSE)
+            * Loss function
+            * R-Squared
+
+        * Ranking
+
+            * Discounted cumulative gains (DCG) @1, @2, and @3
+            * Normalized discounted cumulative gains (NDCG) @1, @2, and @3
+
+        **Reference**
+
+            `Breiman, L. Random Forests. Machine Learning (2001) 45: 5.
+            <https://link.springer.com/content/pdf/10.1023%2FA%3A1010933404324.pdf>`_
+
+        :param X: {array-like [n_samples, n_features],
+            :py:class:`nimbusml.FileDataStream` }
+        :param number_of_examples: Limit the number of examples to evaluate on.
+            ``'None'`` means all examples in the dataset are used.
+        :param permutation_count: The number of permutations to perform.
+        :filter_zero_weight_features: Pre-filter features with zero weight. PFI
+            will not be evaluated on these features.
+        :return: dataframe containing the mean change in evaluation metrics and
+            standard error of the mean for each feature. Features with the
+            largest change in a metric are the most important in the model with
+            respect to that metric.
+        """
+        self.verbose = verbose
+
+        if not self._is_fitted:
+            raise ValueError(
+                "Model is not fitted. Train or load a model before test().")
+
+        X, _, _, _, _, schema, _, _ = self._preprocess_X_y(X)
+
+        all_nodes = []
+        inputs = dict([('data', ''), ('predictor_model', self.model)])
+        if isinstance(X, FileDataStream):
+            importtext_node = data_customtextloader(
+                input_file="$file",
+                data="$data",
+                custom_schema=schema.to_string(
+                    add_sep=True))
+            all_nodes = [importtext_node]
+            inputs = dict([('file', ''), ('predictor_model', self.model)])
+
+        pfi_node = transforms_permutationfeatureimportance(
+            data="$data",
+            predictor_model="$predictor_model",
+            metrics="$output_data",
+            permutation_count=permutation_count,
+            number_of_examples_to_use=number_of_examples,
+            use_feature_weight_filter=filter_zero_weight_features)
+        
+        all_nodes.extend([pfi_node])
+
+        outputs = dict(output_data="")
+
+        data_output_format = DataOutputFormat.IDV if as_binary_data_stream \
+                             else DataOutputFormat.DF,
+
         graph = Graph(
             inputs,
             outputs,
-            as_binary_data_stream,
+            data_output_format,
             *all_nodes)
 
         class_name = type(self).__name__
@@ -1770,7 +1993,7 @@ def get_feature_contributions(self, X, top=10, bottom=10, verbose=0,
         telemetry_info = ".".join([class_name, method_name])
 
         try:
-            (out_model, out_data, out_metrics) = graph.run(
+            (out_model, out_data, out_metrics, _) = graph.run(
                 X=X,
                 random_state=self.random_state,
                 model=self.model,
@@ -1780,8 +2003,38 @@ def get_feature_contributions(self, X, top=10, bottom=10, verbose=0,
         except RuntimeError as e:
             raise e
 
+        out_data = self._fix_pfi_columns(out_data)
+
         return out_data
 
+    def _fix_pfi_columns(self, data):
+        cols = []
+        for i in range(len(data.columns)):
+            if 'StdErr' in data.columns.values[i]:
+                if data.columns.values[i][:15] == 'PerClassLogLoss' :
+                    cols.append('PerClassLogLoss' + \
+                        data.columns.values[i][21:] + '.StdErr')
+                elif data.columns.values[i][:10] == 'Discounted':
+                    pos = int(data.columns.values[i][-1]) + 1
+                    cols.append('DCG@' + str(pos) + '.StdErr')
+                elif data.columns.values[i][:10] == 'Normalized':
+                    pos = int(data.columns.values[i][-1]) + 1
+                    cols.append('NDCG@' + str(pos) + '.StdErr')
+                else:
+                    cols.append(data.columns.values[i][:-6] + '.StdErr')
+            else:
+                if data.columns.values[i][:10] == 'Discounted':
+                    pos = int(data.columns.values[i][26]) + 1
+                    cols.append('DCG@' + str(pos))
+                elif data.columns.values[i][:10] == 'Normalized':
+                    pos = int(data.columns.values[i][36]) + 1
+                    cols.append('NDCG@' + str(pos))
+                else:
+                    cols.append(data.columns.values[i])
+        data.columns = cols
+        
+        return data
+
     @trace
     def _predict(self, X, y=None,
                  evaltype='auto', group_id=None,
@@ -1816,22 +2069,44 @@ def _predict(self, X, y=None,
                 isinstance(X, DataFrame) and isinstance(y, (str, tuple))):
             y = y_temp
 
+        is_transformer_chain = False
+        with ZipFile(self.model) as model_zip:
+            is_transformer_chain = any('TransformerChain' in item
+                                   for item in model_zip.namelist())
+
         all_nodes = []
-        inputs = dict([('data', ''), ('predictor_model', self.model)])
-        if isinstance(X, FileDataStream):
-            importtext_node = data_customtextloader(
-                input_file="$file",
+        if is_transformer_chain:
+            inputs = dict([('data', ''), ('transform_model', self.model)])
+            if isinstance(X, FileDataStream):
+                importtext_node = data_customtextloader(
+                    input_file="$file",
+                    data="$data",
+                    custom_schema=schema.to_string(
+                        add_sep=True))
+                all_nodes = [importtext_node]
+                inputs = dict([('file', ''), ('transform_model', self.model)])
+
+            score_node = transforms_datasettransformscorer(
                 data="$data",
-                custom_schema=schema.to_string(
-                    add_sep=True))
-            all_nodes = [importtext_node]
-            inputs = dict([('file', ''), ('predictor_model', self.model)])
-
-        score_node = transforms_datasetscorer(
-            data="$data",
-            predictor_model="$predictor_model",
-            scored_data="$scoredVectorData")
-        all_nodes.extend([score_node])
+                transform_model="$transform_model",
+                scored_data="$scoredVectorData")
+            all_nodes.extend([score_node])
+        else:
+            inputs = dict([('data', ''), ('predictor_model', self.model)])
+            if isinstance(X, FileDataStream):
+                importtext_node = data_customtextloader(
+                    input_file="$file",
+                    data="$data",
+                    custom_schema=schema.to_string(
+                        add_sep=True))
+                all_nodes = [importtext_node]
+                inputs = dict([('file', ''), ('predictor_model', self.model)])
+
+            score_node = transforms_datasetscorerex(
+                data="$data",
+                predictor_model="$predictor_model",
+                scored_data="$scoredVectorData")
+            all_nodes.extend([score_node])
 
         if (evaltype in ['binary', 'multiclass']) or \
            (hasattr(self, 'steps')
@@ -1866,10 +2141,13 @@ def _predict(self, X, y=None,
         else:
             outputs = dict(output_data="")
 
+        data_output_format = DataOutputFormat.IDV if as_binary_data_stream \
+                             else DataOutputFormat.DF,
+
         graph = Graph(
             inputs,
             outputs,
-            as_binary_data_stream,
+            data_output_format,
             *all_nodes)
 
         class_name = type(self).__name__
@@ -1877,7 +2155,7 @@ def _predict(self, X, y=None,
         telemetry_info = ".".join([class_name, method_name])
 
         try:
-            (out_model, out_data, out_metrics) = graph.run(
+            (out_model, out_data, out_metrics, _) = graph.run(
                 X=X,
                 y=y,
                 random_state=self.random_state,
@@ -1889,6 +2167,10 @@ def _predict(self, X, y=None,
             self._run_time = time.time() - start_time
             raise e
 
+        if is_transformer_chain:
+            out_data['PredictedLabel'] = out_data['PredictedLabel']*1
+
+
         if y is not None:
             # We need to fix the schema for ranking metrics
             if evaltype == 'ranking':
@@ -1900,7 +2182,7 @@ def _predict(self, X, y=None,
         return out_data, out_metrics
 
     def _extract_classes(self, y):
-        if ((len(self.steps) > 0) and
+        if (self.steps and
             (self.last_node.type in ['classifier', 'anomaly']) and
             (y is not None) and
             (not isinstance(y, (str, tuple)))):
@@ -1913,7 +2195,10 @@ def _extract_classes(self, y):
             self._add_classes(unique_classes)
 
     def _extract_classes_from_headers(self, headers):
-        if hasattr(self.last_node, 'classes_'):
+        # Note: _classes can not be added to the Pipeline unless
+        # it already exists in the predictor node because the
+        # dtype is required to set the correct type.
+        if self.steps and hasattr(self.last_node, 'classes_'):
             classes = [x.replace('Score.', '') for x in headers]
             classes = np.array(classes).astype(self.last_node.classes_.dtype)
             self._add_classes(classes)
@@ -1922,7 +2207,9 @@ def _add_classes(self, classes):
         # Create classes_ attribute similar to scikit
         # Add both to pipeline and ending classifier
         self.classes_ = classes
-        self.last_node.classes_ = classes
+
+        if self.steps:
+            self.last_node.classes_ = classes
 
     @trace
     def predict(self, X, verbose=0, as_binary_data_stream=False, **params):
@@ -1947,7 +2234,7 @@ def predict_proba(self, X, verbose=0, **params):
 
         :return: array, shape = [n_samples, n_classes]
         """
-        if hasattr(self, 'steps') and len(self.steps) > 0:
+        if hasattr(self, 'steps') and self.steps:
             last_node = self.last_node
             last_node._check_implements_method('predict_proba')
 
@@ -1987,7 +2274,7 @@ def decision_function(self, X, verbose=0, **params):
         :return: array, shape=(n_samples,) if n_classes == 2 else (
             n_samples, n_classes)
         """
-        if hasattr(self, 'steps') and len(self.steps) > 0:
+        if hasattr(self, 'steps') and self.steps:
             last_node = self.last_node
             last_node._check_implements_method('decision_function')
 
@@ -2147,7 +2434,6 @@ def test(
     def transform(
             self,
             X,
-            y=None,
             verbose=0,
             as_binary_data_stream=False,
             **params):
@@ -2168,18 +2454,7 @@ def transform(
                 "Model is not fitted. Train or load a model before test("
                 ").")
 
-        if y is not None:
-            if len(self.steps) > 0:
-                last_node = self.last_node
-                if last_node.type == 'transform':
-                    raise ValueError(
-                        "Pipeline needs a trainer as last step for test()")
-
-        X, y_temp, columns_renamed, feature_columns, label_column, \
-            schema, weights, weight_column = self._preprocess_X_y(X, y)
-
-        if not isinstance(y, (str, tuple)):
-            y = y_temp
+        X, _, _, _, _, schema, _, _ = self._preprocess_X_y(X)
 
         all_nodes = []
 
@@ -2200,10 +2475,16 @@ def transform(
 
         all_nodes.extend([apply_node])
 
+        data_output_format = DataOutputFormat.DF
+        if as_binary_data_stream:
+            data_output_format = DataOutputFormat.IDV
+        elif params.pop('as_csr', False):
+            data_output_format = DataOutputFormat.CSR
+
         graph = Graph(
             inputs,
             dict(output_data=""),
-            as_binary_data_stream,
+            data_output_format,
             *all_nodes)
 
         class_name = type(self).__name__
@@ -2212,7 +2493,7 @@ def transform(
         max_slots = params.pop('max_slots', -1)
 
         try:
-            (out_model, out_data, out_metrics) = graph.run(
+            (out_model, out_data, out_metrics, _) = graph.run(
                 X=X,
                 random_state=self.random_state,
                 model=self.model,
@@ -2275,7 +2556,7 @@ def summary(self, verbose=0, **params):
         graph = Graph(
             inputs,
             outputs,
-            False,
+            DataOutputFormat.DF,
             *all_nodes)
 
         class_name = type(self).__name__
@@ -2283,7 +2564,7 @@ def summary(self, verbose=0, **params):
         telemetry_info = ".".join([class_name, method_name])
 
         try:
-            (_, summary_data, _) = graph.run(
+            (_, summary_data, _, _) = graph.run(
                 X=None,
                 y=None,
                 random_state=self.random_state,
@@ -2296,7 +2577,6 @@ def summary(self, verbose=0, **params):
             self._run_time = time.time() - start_time
             raise e
 
-        self._validate_model_summary(summary_data)
         self.model_summary = summary_data
 
         # stop the clock
@@ -2304,46 +2584,6 @@ def summary(self, verbose=0, **params):
         self._write_csv_time = graph._write_csv_time
         return self.model_summary
 
-    @trace
-    def _validate_model_summary(self, model_summary):
-        """
-        Validates model summary has correct format
-
-        :param model_summary: model summary dataframes
-
-        """
-        if not isinstance(model_summary, (DataFrame)):
-            raise TypeError(
-                "Unexpected type {0} for model_summary, type DataFrame "
-                "is expected ".format(
-                    type(model_summary)))
-
-        col_names = [
-            'Bias',
-            'ClassNames',
-            'Coefficients',
-            'PredictorName',
-            'Summary',
-            'VectorName'
-        ]
-
-        col_name_prefixes = [
-            'Weights',
-            'Gains',
-            'Support vectors.',
-            'VectorData'
-        ]
-
-        for col in model_summary.columns:
-            if col in col_names:
-                pass
-            elif any([col.startswith(pre) for pre in col_name_prefixes]):
-                pass
-            else:
-                raise TypeError(
-                    "Unsupported '{0}' column is in model_summary".format(
-                        col))
-
     @trace
     def save_model(self, dst):
         """
@@ -2374,7 +2614,7 @@ def load_model(self, src):
         self.steps = []
 
     def __getstate__(self):
-        odict = {'export_version': 1}
+        odict = {'export_version': 2}
 
         if hasattr(self, 'steps'):
             odict['steps'] = self.steps
@@ -2386,6 +2626,13 @@ def __getstate__(self):
             with open(self.model, "rb") as f:
                 odict['modelbytes'] = f.read()
 
+        if (hasattr(self, 'predictor_model') and 
+            self.predictor_model is not None and
+            os.path.isfile(self.predictor_model)):
+
+            with open(self.predictor_model, "rb") as f:
+                odict['predictor_model_bytes'] = f.read()
+
         return odict
 
     def __setstate__(self, state):
@@ -2393,11 +2640,18 @@ def __setstate__(self, state):
         self.model = None
         self.random_state = None
 
-        for k, v in state.items():
-            if k not in {'modelbytes', 'export_version'}:
-                setattr(self, k, v)
+        if state.get('export_version', 0) == 0:
+            # Pickled pipelines which were created
+            # before export_version was added used
+            # the default implementation which uses
+            # the instances __dict__.
+            if 'steps' in state:
+                self.steps = state['steps']
+
+        elif state.get('export_version', 0) in {1, 2}:
+            if 'steps' in state:
+                self.steps = state['steps']
 
-        if state.get('export_version', 0) == 1:
             if 'modelbytes' in state:
                 (fd, modelfile) = tempfile.mkstemp()
                 fl = os.fdopen(fd, "wb")
@@ -2405,6 +2659,16 @@ def __setstate__(self, state):
                 fl.close()
                 self.model = modelfile
 
+            if 'predictor_model_bytes' in state:
+                (fd, modelfile) = tempfile.mkstemp()
+                fl = os.fdopen(fd, "wb")
+                fl.write(state['predictor_model_bytes'])
+                fl.close()
+                self.predictor_model = modelfile
+
+        else:
+            raise ValueError('Pipeline version not supported.')
+
     @trace
     def score(
             self,
@@ -2525,7 +2789,7 @@ def combine_models(cls, *items, **params):
         graph = Graph(
             inputs,
             outputs,
-            False,
+            DataOutputFormat.DF,
             *nodes)
 
         class_name = cls.__name__
@@ -2533,7 +2797,7 @@ def combine_models(cls, *items, **params):
         telemetry_info = ".".join([class_name, method_name])
 
         try:
-            (out_model, _, _) = graph.run(
+            (out_model, _, _, _) = graph.run(
                 X=None,
                 y=None,
                 random_state=None,
diff --git a/src/python/nimbusml/base_predictor.py b/src/python/nimbusml/base_predictor.py
index f33f746c..538e7b5c 100644
--- a/src/python/nimbusml/base_predictor.py
+++ b/src/python/nimbusml/base_predictor.py
@@ -88,7 +88,13 @@ def _invoke_inference_method(self, method, X, **params):
 
     @trace
     def get_feature_contributions(self, X, **params):
-        return self._invoke_inference_method('get_feature_contributions', X, **params)
+        return self._invoke_inference_method('get_feature_contributions',
+                                             X, **params)
+
+    @trace
+    def permutation_feature_importance(self, X, **params):
+        return self._invoke_inference_method('permutation_feature_importance',
+                                             X, **params)
 
     @trace
     def predict(self, X, **params):
diff --git a/src/python/nimbusml/examples/LpScaler.py b/src/python/nimbusml/examples/LpScaler.py
new file mode 100644
index 00000000..b77b8539
--- /dev/null
+++ b/src/python/nimbusml/examples/LpScaler.py
@@ -0,0 +1,47 @@
+###############################################################################
+# LpScaler
+import numpy
+from nimbusml import FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.preprocessing.normalization import LpScaler
+
+path = get_dataset('infert').as_filepath()
+data = FileDataStream.read_csv(
+    path,
+    sep=',',
+    numeric_dtype=numpy.float32,
+    collapse=True)
+
+print(data.head())
+
+# row_num education  age.age  age.parity  age.induced  age.case  age.spontaneous  age.stratum  age.pooled.stratum
+#     1.0    0-5yrs     26.0         6.0          1.0       1.0              2.0          1.0                 3.0
+#     2.0    0-5yrs     42.0         1.0          1.0       1.0              0.0          2.0                 1.0
+#     3.0    0-5yrs     39.0         6.0          2.0       1.0              0.0          3.0                 4.0
+#     4.0    0-5yrs     34.0         4.0          2.0       1.0              0.0          4.0                 2.0
+#     5.0   6-11yrs     35.0         3.0          1.0       1.0              1.0          5.0                32.0
+
+xf = LpScaler(columns={'norm': 'age'})
+features = xf.fit_transform(data)
+
+print_opts = {
+    'index': False,
+    'justify': 'left',
+    'columns': [
+        'norm.age',
+        'norm.parity',
+        'norm.induced',
+        'norm.case',
+        'norm.spontaneous',
+        'norm.stratum',
+        'norm.pooled.stratum'
+    ]
+}
+print('LpScaler\n', features.head().to_string(**print_opts))
+
+# norm.age  norm.parity  norm.induced  norm.case  norm.spontaneous  norm.stratum  norm.pooled.stratum
+# 0.963624  0.222375     0.037062      0.037062   0.074125          0.037062      0.111187
+# 0.997740  0.023756     0.023756      0.023756   0.000000          0.047511      0.023756
+# 0.978985  0.150613     0.050204      0.025102   0.000000          0.075307      0.100409
+# 0.982725  0.115615     0.057807      0.028904   0.000000          0.115615      0.057807
+# 0.732032  0.062746     0.020915      0.020915   0.020915          0.104576      0.669286
diff --git a/src/python/nimbusml/examples/PermutationFeatureImportance.py b/src/python/nimbusml/examples/PermutationFeatureImportance.py
new file mode 100644
index 00000000..44a476ba
--- /dev/null
+++ b/src/python/nimbusml/examples/PermutationFeatureImportance.py
@@ -0,0 +1,173 @@
+###############################################################################
+# Permutation Feature Importance (PFI)
+
+# Permutation feature importance (PFI) is a technique to determine the global
+# importance of features in a trained machine learning model. PFI is a simple
+# yet powerful technique motivated by Breiman in section 10 of his Random
+# Forests paper (Machine Learning, 2001). The advantage of the PFI method is
+# that it is model agnostic - it works with any model that can be evaluated -
+# and it can use any dataset, not just the training set, to compute feature
+# importance metrics.
+        
+# PFI works by taking a labeled dataset, choosing a feature, and permuting the
+# values for that feature across all the examples, so that each example now has
+# a random value for the feature and the original values for all other
+# features. The evaluation metric (e.g. NDCG) is then calculated for this
+# modified dataset, and the change in the evaluation metric from the original
+# dataset is computed. The larger the change in the evaluation metric, the more
+# important the feature is to the model, i.e. the most important features are
+# those that the model is most sensitive to. PFI works by performing this
+# permutation analysis across allthe features of a model, one after another.
+
+# PFI is supported for binary classifiers, classifiers, regressors, and
+# rankers.
+
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.ensemble import LightGbmRanker
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.linear_model import LogisticRegressionBinaryClassifier, \
+    FastLinearClassifier, FastLinearRegressor
+from nimbusml.preprocessing import ToKey
+from numpy.testing import assert_almost_equal
+
+# data input (as a FileDataStream)
+adult_path = get_dataset('uciadult_train').as_filepath()
+classification_data = FileDataStream.read_csv(adult_path)
+print(classification_data.head())
+#    label  workclass     education  ... capital-loss hours-per-week
+# 0      0    Private          11th  ...            0             40
+# 1      0    Private       HS-grad  ...            0             50
+# 2      1  Local-gov    Assoc-acdm  ...            0             40
+# 3      1    Private  Some-college  ...            0             40
+# 4      0          ?  Some-college  ...            0             30
+
+######################################
+# PFI for Binary Classification models
+######################################
+# define the training pipeline with a binary classifier
+binary_pipeline = Pipeline([
+    OneHotVectorizer(columns=['education']),
+    LogisticRegressionBinaryClassifier(
+        feature=['age', 'education'], label='label')])
+
+# train the model
+binary_model = binary_pipeline.fit(classification_data)
+
+# get permutation feature importance
+binary_pfi = binary_model.permutation_feature_importance(classification_data)
+
+# Print PFI for each feature, ordered by most important features w.r.t. AUC.
+# Since AUC is an increasing metric, the highest negative changes indicate the
+# most important features.
+print("============== PFI for Binary Classification Model ==============")
+print(binary_pfi.sort_values('AreaUnderRocCurve').head())
+#               FeatureName  AreaUnderRocCurve  AreaUnderRocCurve.StdErr  ...
+# 0                     age          -0.081604                       0.0  ...
+# 6   education.Prof-school          -0.012964                       0.0  ...
+# 10    education.Doctorate          -0.012863                       0.0  ...
+# 8     education.Bachelors          -0.010593                       0.0  ...
+# 2       education.HS-grad          -0.005918                       0.0  ...
+
+
+###############################
+# PFI for Classification models
+###############################
+# define the training pipeline with a classifier
+# use 1 thread and no shuffling to force determinism
+multiclass_pipeline = Pipeline([
+    OneHotVectorizer(columns=['education']),
+    FastLinearClassifier(feature=['age', 'education'], label='label',
+                         number_of_threads=1, shuffle=False)])
+
+# train the model
+multiclass_model = multiclass_pipeline.fit(classification_data)
+
+# get permutation feature importance
+multiclass_pfi = multiclass_model.permutation_feature_importance(classification_data)
+
+# Print PFI for each feature, ordered by most important features w.r.t. Macro
+# accuracy. Since Macro accuracy is an increasing metric, the highest negative
+# changes indicate the most important features.
+print("================== PFI for Classification Model ==================")
+print(multiclass_pfi.sort_values('MacroAccuracy').head())
+#               FeatureName  MacroAccuracy  ...  MicroAccuracy  ...
+# 10    education.Doctorate      -0.028233  ...         -0.020  ...
+# 0                     age      -0.001750  ...          0.002  ...
+# 6   education.Prof-school      -0.001750  ...          0.002  ...
+# 9       education.Masters      -0.001299  ...         -0.002  ...
+# 1          education.11th       0.000000  ...          0.000  ...
+
+###########################
+# PFI for Regression models
+###########################
+# load input data
+infert_path = get_dataset('infert').as_filepath()
+regression_data = FileDataStream.read_csv(infert_path)
+print(regression_data.head())
+#   age  case education  induced  parity  ... row_num  spontaneous  ...
+# 0   26     1    0-5yrs        1       6 ...       1            2  ...
+# 1   42     1    0-5yrs        1       1 ...       2            0  ...
+# 2   39     1    0-5yrs        2       6 ...       3            0  ...
+# 3   34     1    0-5yrs        2       4 ...       4            0  ...
+# 4   35     1   6-11yrs        1       3 ...       5            1  ...
+
+# define the training pipeline with a regressor
+# use 1 thread and no shuffling to force determinism
+regression_pipeline = Pipeline([
+    OneHotVectorizer(columns=['education']),
+    FastLinearRegressor(feature=['induced', 'education'], label='age',
+                        number_of_threads=1, shuffle=False)])
+
+# train the model
+regression_model = regression_pipeline.fit(regression_data)
+
+# get permutation feature importance
+regression_pfi = regression_model.permutation_feature_importance(regression_data)
+
+# print PFI for each feaure, ordered by most important features w.r.t. MAE.
+# Since MAE is a decreasing metric, the highest positive changes indicate the
+# most important features.
+print("==================== PFI for Regression Model ====================")
+print(regression_pfi.sort_values('MeanAbsoluteError', ascending=False).head())
+#         FeatureName  MeanAbsoluteError  ...  RSquared  RSquared.StdErr
+#3  education.12+ yrs           0.393451  ... -0.146338              0.0
+#0            induced           0.085804  ... -0.026168              0.0
+#1   education.0-5yrs           0.064460  ... -0.027587              0.0
+#2  education.6-11yrs          -0.000047  ...  0.000059              0.0
+
+########################
+# PFI for Ranking models
+########################
+# load input data
+ticket_path = get_dataset('gen_tickettrain').as_filepath()
+ranking_data = FileDataStream.read_csv(ticket_path)
+print(ranking_data.head())
+#    rank  group carrier  price  Class  dep_day  nbr_stops  duration
+# 0     2      1      AA    240      3        1          0      12.0
+# 1     1      1      AA    300      3        0          1      15.0
+# 2     1      1      AA    360      3        0          2      18.0
+# 3     0      1      AA    540      2        0          0      12.0
+# 4     1      1      AA    600      2        0          1      15.0
+
+# define the training pipeline with a ranker
+ranking_pipeline = Pipeline([
+    ToKey(columns=['group']),
+    LightGbmRanker(feature=['Class', 'dep_day', 'duration'],
+                   label='rank', group_id='group')])
+
+# train the model
+ranking_model = ranking_pipeline.fit(ranking_data)
+
+# get permutation feature importance
+ranking_pfi = ranking_model.permutation_feature_importance(ranking_data)
+
+# Print PFI for each feature, ordered by most important features w.r.t. DCG@1.
+# Since DCG is an increasing metric, the highest negative changes indicate the
+# most important features.
+print("===================== PFI for Ranking Model =====================")
+print(ranking_pfi.sort_values('DCG@1').head())
+#     Feature     DCG@1     DCG@2     DCG@3  ...    NDCG@1    NDCG@2 ...
+# 0     Class -4.869096 -7.030914 -5.948893  ... -0.420238 -0.407281 ...
+# 2  duration -2.344379 -3.595958 -3.956632  ... -0.232143 -0.231539 ...
+# 1   dep_day  0.000000  0.000000  0.000000  ...  0.000000  0.000000 ...
diff --git a/src/python/nimbusml/examples/PrefixColumnConcatenator.py b/src/python/nimbusml/examples/PrefixColumnConcatenator.py
new file mode 100644
index 00000000..b11ddb02
--- /dev/null
+++ b/src/python/nimbusml/examples/PrefixColumnConcatenator.py
@@ -0,0 +1,25 @@
+###############################################################################
+# PrefixColumnConcatenator
+import numpy as np
+import pandas as pd
+from nimbusml.preprocessing.schema import PrefixColumnConcatenator
+
+data = pd.DataFrame(
+    data=dict(
+        PrefixA=[2.5, np.nan, 2.1, 1.0],
+        PrefixB=[.75, .9, .8, .76],
+        AnotherColumn=[np.nan, 2.5, 2.6, 2.4]))
+
+# transform usage
+xf = PrefixColumnConcatenator(columns={'combined': 'Prefix'})
+
+# fit and transform
+features = xf.fit_transform(data)
+
+# print features
+print(features.head())
+#   PrefixA  PrefixB  AnotherColumn  combined.PrefixA  combined.PrefixB
+#0      2.5     0.75            NaN               2.5              0.75
+#1      NaN     0.90            2.5               NaN              0.90
+#2      2.1     0.80            2.6               2.1              0.80
+#3      1.0     0.76            2.4               1.0              0.76
\ No newline at end of file
diff --git a/src/python/nimbusml/examples/Schema.py b/src/python/nimbusml/examples/Schema.py
new file mode 100644
index 00000000..c0b8d493
--- /dev/null
+++ b/src/python/nimbusml/examples/Schema.py
@@ -0,0 +1,33 @@
+###############################################################################
+# Get schema from a fitted pipeline example.
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.feature_extraction.text import NGramFeaturizer
+from nimbusml.feature_extraction.text.extractor import Ngram
+
+# data input (as a FileDataStream)
+path = get_dataset("wiki_detox_train").as_filepath()
+
+data = FileDataStream.read_csv(path, sep='\t')
+print(data.head())
+#    Sentiment                                      SentimentText
+# 0          1  ==RUDE== Dude, you are rude upload that carl p...
+# 1          1  == OK! ==  IM GOING TO VANDALIZE WILD ONES WIK...
+# 2          1  Stop trolling, zapatancas, calling me a liar m...
+# 3          1  ==You're cool==  You seem like a really cool g...
+# 4          1  ::::: Why are you threatening me? I'm not bein...
+
+pipe = Pipeline([
+    NGramFeaturizer(
+        word_feature_extractor=Ngram(),
+        columns={
+            'features': ['SentimentText']})
+])
+
+pipe.fit(data)
+schema = pipe.get_output_columns()
+
+print(schema[0:5])
+# ['Sentiment', 'SentimentText', 'features.Char.<?>|=|=', 'features.Char.=|=|r', 'features.Char.=|r|u']
diff --git a/src/python/nimbusml/examples/WordTokenizer.py b/src/python/nimbusml/examples/WordTokenizer.py
new file mode 100644
index 00000000..028d5d7e
--- /dev/null
+++ b/src/python/nimbusml/examples/WordTokenizer.py
@@ -0,0 +1,32 @@
+###############################################################################
+# WordTokenizer 
+
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.preprocessing.text import WordTokenizer
+
+# data input (as a FileDataStream)
+path = get_dataset("wiki_detox_train").as_filepath()
+
+data = FileDataStream.read_csv(path, sep='\t')
+print(data.head())
+#   Sentiment                                      SentimentText
+# 0          1  ==RUDE== Dude, you are rude upload that carl p...
+# 1          1  == OK! ==  IM GOING TO VANDALIZE WILD ONES WIK...
+# 2          1  Stop trolling, zapatancas, calling me a liar m...
+# 3          1  ==You're cool==  You seem like a really cool g...
+# 4          1  ::::: Why are you threatening me? I'm not bein...
+
+tokenize = WordTokenizer(char_array_term_separators=[" "]) << {'wt': 'SentimentText'}
+pipeline = Pipeline([tokenize])
+
+tokenize.fit(data)
+y = tokenize.transform(data)
+
+print(y.drop(labels='SentimentText', axis=1).head())
+#    Sentiment    wt.000     wt.001       wt.002   wt.003       wt.004  wt.005  ... wt.366 wt.367 wt.368 wt.369 wt.370 wt.371 wt.372
+# 0          1  ==RUDE==      Dude,          you      are         rude  upload  ...   None   None   None   None   None   None   None
+# 1          1        ==        OK!           ==       IM        GOING      TO  ...   None   None   None   None   None   None   None
+# 2          1      Stop  trolling,  zapatancas,  calling           me       a  ...   None   None   None   None   None   None   None
+# 3          1  ==You're     cool==          You     seem         like       a  ...   None   None   None   None   None   None   None
+# 4          1     :::::        Why          are      you  threatening     me?  ...   None   None   None   None   None   None   None
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/LightLda_df.py b/src/python/nimbusml/examples/examples_from_dataframe/LightLda_df.py
index fd4df05b..c4a35a8f 100644
--- a/src/python/nimbusml/examples/examples_from_dataframe/LightLda_df.py
+++ b/src/python/nimbusml/examples/examples_from_dataframe/LightLda_df.py
@@ -2,9 +2,8 @@
 # LightLda: cluster topics
 import pandas
 from nimbusml import Pipeline
-from nimbusml.feature_extraction.text import LightLda
-from nimbusml.feature_extraction.text import NGramFeaturizer
-from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
+from nimbusml.feature_extraction.text import LightLda, NGramFeaturizer
+from nimbusml.feature_extraction.text.extractor import Ngram
 
 # create the data
 topics = pandas.DataFrame(data=dict(review=[
@@ -19,7 +18,7 @@
 
 # there are three main topics in our data. set num_topic=3
 # and see if LightLDA vectors for topics look similar
-pipeline = Pipeline([NGramFeaturizer(word_feature_extractor=n_gram(
+pipeline = Pipeline([NGramFeaturizer(word_feature_extractor=Ngram(
 ), vector_normalizer='None') << 'review', LightLda(num_topic=3)])
 y = pipeline.fit_transform(topics)
 
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/LpScaler_df.py b/src/python/nimbusml/examples/examples_from_dataframe/LpScaler_df.py
new file mode 100644
index 00000000..d84679ab
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/LpScaler_df.py
@@ -0,0 +1,48 @@
+###############################################################################
+# LpScaler
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline
+from nimbusml.preprocessing.normalization import LpScaler
+from nimbusml.preprocessing.schema import ColumnConcatenator
+
+in_df = pd.DataFrame(
+    data=dict(
+        Sepal_Length=[2.5, 1, 2.1, 1.0],
+        Sepal_Width=[.75, .9, .8, .76],
+        Petal_Length=[0, 2.5, 2.6, 2.4],
+        Species=["setosa", "viginica", "setosa", 'versicolor']))
+
+in_df.iloc[:, 0:3] = in_df.iloc[:, 0:3].astype(np.float32)
+
+concat = ColumnConcatenator() << {
+    'cat': [ 'Sepal_Length', 'Sepal_Width', 'Petal_Length']
+}
+
+# Normalize the input values by rescaling them to unit norm (L2, L1 or LInf).
+# Performs the following operation on a vector X: Y = (X - M) / D, where M is
+# mean and D is either L2 norm, L1 norm or LInf norm.
+normed = LpScaler() << {'norm': 'cat'}
+
+pipeline = Pipeline([concat, normed])
+out_df = pipeline.fit_transform(in_df)
+
+print_opts = {
+    'index': False,
+    'justify': 'left',
+    'columns': [
+        'Sepal_Length',
+        'Sepal_Width',
+        'Petal_Length',
+        'norm.Sepal_Length',
+        'norm.Sepal_Width',
+        'norm.Petal_Length'
+    ]
+}
+print('LpScaler\n', out_df.to_string(**print_opts))
+
+# Sepal_Length  Sepal_Width  Petal_Length  norm.Sepal_Length  norm.Sepal_Width  norm.Petal_Length
+# 2.5           0.75         0.0           0.957826           0.287348          0.000000
+# 1.0           0.90         2.5           0.352235           0.317011          0.880587
+# 2.1           0.80         2.6           0.611075           0.232790          0.756569
+# 1.0           0.76         2.4           0.369167           0.280567          0.886001
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/NGramFeaturizer_df.py b/src/python/nimbusml/examples/examples_from_dataframe/NGramFeaturizer_df.py
index e87b8168..e6cc14d1 100644
--- a/src/python/nimbusml/examples/examples_from_dataframe/NGramFeaturizer_df.py
+++ b/src/python/nimbusml/examples/examples_from_dataframe/NGramFeaturizer_df.py
@@ -2,7 +2,7 @@
 # Example with TextTransform and LogisticRegressionBinaryClassifier
 import pandas
 from nimbusml.feature_extraction.text import NGramFeaturizer
-from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
+from nimbusml.feature_extraction.text.extractor import Ngram
 from nimbusml.linear_model import LogisticRegressionBinaryClassifier
 
 train_reviews = pandas.DataFrame(
@@ -77,7 +77,7 @@
 y = train_reviews['like']
 X = train_reviews.loc[:, train_reviews.columns != 'like']
 
-ngram = NGramFeaturizer(word_feature_extractor=n_gram()) << 'review'
+ngram = NGramFeaturizer(word_feature_extractor=Ngram()) << 'review'
 X = ngram.fit_transform(X)
 
 # view the transformed numerical values and column names
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/PrefixColumnConcatenator_df.py b/src/python/nimbusml/examples/examples_from_dataframe/PrefixColumnConcatenator_df.py
new file mode 100644
index 00000000..022e014a
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/PrefixColumnConcatenator_df.py
@@ -0,0 +1,31 @@
+###############################################################################
+# PrefixColumnConcatenator
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, Role
+from nimbusml.datasets import get_dataset
+from nimbusml.linear_model import LogisticRegressionClassifier
+from nimbusml.preprocessing.schema import PrefixColumnConcatenator
+from nimbusml.preprocessing.schema import ColumnDropper
+from sklearn.model_selection import train_test_split
+
+# use 'iris' data set to create test and train data
+#    Sepal_Length  Sepal_Width  Petal_Length  Petal_Width Label Species  Setosa
+# 0           5.1          3.5           1.4          0.2     0  setosa     1.0
+# 1           4.9          3.0           1.4          0.2     0  setosa     1.0
+df = get_dataset("iris").as_df()
+
+X_train, X_test, y_train, y_test = \
+    train_test_split(df.loc[:, df.columns != 'Label'], df['Label'])
+
+concat = PrefixColumnConcatenator() << {'Sepal': 'Sepal_'}
+concat1 = PrefixColumnConcatenator() << {'Petal': 'Petal_'}
+dropcols = ColumnDropper() << ['Sepal_Length', 'Sepal_Width', 'Petal_Length',
+                              'Petal_Width', 'Setosa', 'Species']
+
+pipeline = Pipeline([concat, concat1, dropcols, LogisticRegressionClassifier()])
+pipeline.fit(X_train, y_train)
+
+# Evaluate the model
+metrics, scores = pipeline.test(X_test, y_test, output_scores=True)
+print(metrics)
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/WordTokenizer_df.py b/src/python/nimbusml/examples/examples_from_dataframe/WordTokenizer_df.py
new file mode 100644
index 00000000..31980567
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/WordTokenizer_df.py
@@ -0,0 +1,33 @@
+###############################################################################
+# WordTokenizer 
+
+import pandas
+from nimbusml import Pipeline
+from nimbusml.preprocessing.text import WordTokenizer
+
+# create the data
+customer_reviews = pandas.DataFrame(data=dict(review=[
+    "I really did not like the taste of it",
+    "It was surprisingly quite good!",
+    "I will never ever ever go to that place again!!",
+    "The best ever!! It was amazingly good and super fast",
+    "I wish I had gone earlier, it was that great",
+    "somewhat dissapointing. I'd probably wont try again",
+    "Never visit again... rascals!"]))
+
+tokenize = WordTokenizer(char_array_term_separators=[" ", "n"]) << 'review'
+
+pipeline = Pipeline([tokenize])
+
+tokenize.fit(customer_reviews)
+y = tokenize.transform(customer_reviews)
+
+print(y)
+#   review.00 review.01 review.02 review.03 review.04 review.05 review.06 review.07 review.08 review.09 review.10 review.11
+# 0         I    really       did        ot      like       the     taste        of        it      None      None      None
+# 1        It       was  surprisi       gly     quite     good!      None      None      None      None      None      None
+# 2         I      will      ever      ever      ever        go        to      that     place      agai        !!      None
+# 3       The      best    ever!!        It       was     amazi       gly      good         a         d     super      fast
+# 4         I      wish         I       had        go         e  earlier,        it       was      that     great      None
+# 5  somewhat  dissapoi        ti        g.       I'd  probably        wo         t       try      agai      None      None
+# 6     Never     visit      agai       ...  rascals!      None      None      None      None      None      None      None
diff --git a/src/python/nimbusml/internal/core/linear_model/_linearsvmbinaryclassifier.py b/src/python/nimbusml/internal/core/linear_model/_linearsvmbinaryclassifier.py
index 0109ba44..34a18740 100644
--- a/src/python/nimbusml/internal/core/linear_model/_linearsvmbinaryclassifier.py
+++ b/src/python/nimbusml/internal/core/linear_model/_linearsvmbinaryclassifier.py
@@ -69,7 +69,9 @@ class LinearSvmBinaryClassifier(
 
     :param caching: Whether trainer should cache input training data.
 
-    :param lambda_: Regularizer constant.
+    :param l2_regularization: L2 regularization weight. It also controls the
+        learning rate, with the learning rate being inversely proportional to
+        it.
 
     :param perform_projection: Perform projection to unit-ball? Typically used
         with batch size > 1.
@@ -105,7 +107,7 @@ def __init__(
             self,
             normalize='Auto',
             caching='Auto',
-            lambda_=0.001,
+            l2_regularization=0.001,
             perform_projection=False,
             number_of_iterations=1,
             initial_weights_diameter=0.0,
@@ -119,7 +121,7 @@ def __init__(
 
         self.normalize = normalize
         self.caching = caching
-        self.lambda_ = lambda_
+        self.l2_regularization = l2_regularization
         self.perform_projection = perform_projection
         self.number_of_iterations = number_of_iterations
         self.initial_weights_diameter = initial_weights_diameter
@@ -146,7 +148,7 @@ def _get_node(self, **all_args):
                 all_args),
             normalize_features=self.normalize,
             caching=self.caching,
-            lambda_=self.lambda_,
+            lambda_=self.l2_regularization,
             perform_projection=self.perform_projection,
             number_of_iterations=self.number_of_iterations,
             initial_weights_diameter=self.initial_weights_diameter,
diff --git a/src/python/nimbusml/internal/core/preprocessing/_datasettransformer.py b/src/python/nimbusml/internal/core/preprocessing/_datasettransformer.py
new file mode 100644
index 00000000..545e6e36
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/_datasettransformer.py
@@ -0,0 +1,49 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+DatasetTransformer
+"""
+
+__all__ = ["DatasetTransformer"]
+
+
+from ...entrypoints.models_datasettransformer import models_datasettransformer
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class DatasetTransformer(BasePipelineItem, DefaultSignature):
+    """
+    **Description**
+        Applies a TransformModel to a dataset.
+
+    :param transform_model: Transform model.
+
+    :param params: Additional arguments sent to compute engine.
+
+    """
+
+    @trace
+    def __init__(
+            self,
+            transform_model,
+            **params):
+        BasePipelineItem.__init__(
+            self, type='transform', **params)
+
+        self.transform_model = transform_model
+
+    @property
+    def _entrypoint(self):
+        return models_datasettransformer
+
+    @trace
+    def _get_node(self, **all_args):
+        algo_args = dict(
+            transform_model=self.transform_model)
+
+        all_args.update(algo_args)
+        return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/preprocessing/normalization/_lpscaler.py b/src/python/nimbusml/internal/core/preprocessing/normalization/_lpscaler.py
new file mode 100644
index 00000000..3dce5d56
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/normalization/_lpscaler.py
@@ -0,0 +1,93 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+LpScaler
+"""
+
+__all__ = ["LpScaler"]
+
+
+from ....entrypoints.transforms_lpnormalizer import transforms_lpnormalizer
+from ....utils.utils import trace
+from ...base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class LpScaler(BasePipelineItem, DefaultSignature):
+    """
+    **Description**
+        Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm.
+
+    :param norm: The norm to use to normalize each sample.
+
+    :param sub_mean: Subtract mean from each value before normalizing.
+
+    :param params: Additional arguments sent to compute engine.
+
+    """
+
+    @trace
+    def __init__(
+            self,
+            norm='L2',
+            sub_mean=False,
+            **params):
+        BasePipelineItem.__init__(
+            self, type='transform', **params)
+
+        self.norm = norm
+        self.sub_mean = sub_mean
+
+    @property
+    def _entrypoint(self):
+        return transforms_lpnormalizer
+
+    @trace
+    def _get_node(self, **all_args):
+
+        input_columns = self.input
+        if input_columns is None and 'input' in all_args:
+            input_columns = all_args['input']
+        if 'input' in all_args:
+            all_args.pop('input')
+
+        output_columns = self.output
+        if output_columns is None and 'output' in all_args:
+            output_columns = all_args['output']
+        if 'output' in all_args:
+            all_args.pop('output')
+
+        # validate input
+        if input_columns is None:
+            raise ValueError(
+                "'None' input passed when it cannot be none.")
+
+        if not isinstance(input_columns, list):
+            raise ValueError(
+                "input has to be a list of strings, instead got %s" %
+                type(input_columns))
+
+        # validate output
+        if output_columns is None:
+            output_columns = input_columns
+
+        if not isinstance(output_columns, list):
+            raise ValueError(
+                "output has to be a list of strings, instead got %s" %
+                type(output_columns))
+
+        algo_args = dict(
+            column=[
+                dict(
+                    Source=i,
+                    Name=o) for i,
+                o in zip(
+                    input_columns,
+                    output_columns)] if input_columns else None,
+            norm=self.norm,
+            sub_mean=self.sub_mean)
+
+        all_args.update(algo_args)
+        return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/preprocessing/schema/_prefixcolumnconcatenator.py b/src/python/nimbusml/internal/core/preprocessing/schema/_prefixcolumnconcatenator.py
new file mode 100644
index 00000000..d202e947
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/schema/_prefixcolumnconcatenator.py
@@ -0,0 +1,100 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+"""
+PrefixColumnConcatenator
+"""
+
+__all__ = ["PrefixColumnConcatenator"]
+
+
+from ....entrypoints.transforms_prefixcolumnconcatenator import \
+    transforms_prefixcolumnconcatenator
+from ....utils.utils import trace
+from ...base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class PrefixColumnConcatenator(BasePipelineItem, DefaultSignature):
+    """
+
+    Combines several columns into a single vector-valued column by prefix
+
+    .. remarks::
+        ``PrefixColumnConcatenator`` creates a single vector-valued column from
+        multiple
+        columns. It can be performed on data before training a model. The
+        concatenation
+        can significantly speed up the processing of data when the number of
+        columns
+        is as large as hundreds to thousands.
+
+    :param params: Additional arguments sent to compute engine.
+
+    .. seealso::
+        :py:class:`ColumnDropper
+        <nimbusml.preprocessing.schema.ColumnDropper>`,
+        :py:class:`ColumnSelector
+        <nimbusml.preprocessing.schema.ColumnSelector>`.
+
+    .. index:: transform, schema
+
+    Example:
+       .. literalinclude:: /../nimbusml/examples/PrefixColumnConcatenator.py
+              :language: python
+    """
+
+    @trace
+    def __init__(
+            self,
+            **params):
+        BasePipelineItem.__init__(
+            self, type='transform', **params)
+
+    @property
+    def _entrypoint(self):
+        return transforms_prefixcolumnconcatenator
+
+    @trace
+    def _get_node(self, **all_args):
+
+        input_columns = self.input
+        if input_columns is None and 'input' in all_args:
+            input_columns = all_args['input']
+        if 'input' in all_args:
+            all_args.pop('input')
+
+        output_columns = self.output
+        if output_columns is None and 'output' in all_args:
+            output_columns = all_args['output']
+        if 'output' in all_args:
+            all_args.pop('output')
+
+        # validate input
+        if input_columns is None:
+            raise ValueError(
+                "'None' input passed when it cannot be none.")
+
+        if not isinstance(input_columns, list):
+            raise ValueError(
+                "input has to be a list of strings, instead got %s" %
+                type(input_columns))
+
+        # validate output
+        if output_columns is None:
+            raise ValueError(
+                "'None' output passed when it cannot be none.")
+
+        if not isinstance(output_columns, list):
+            raise ValueError(
+                "output has to be a list of strings, instead got %s" %
+                type(output_columns))
+
+        algo_args = dict(
+            column=[
+                dict(
+                    Source=i, Name=o) for i, o in zip(
+                    input_columns, output_columns)] if input_columns else None)
+
+        all_args.update(algo_args)
+        return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/preprocessing/text/_wordtokenizer.py b/src/python/nimbusml/internal/core/preprocessing/text/_wordtokenizer.py
new file mode 100644
index 00000000..66e06176
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/text/_wordtokenizer.py
@@ -0,0 +1,89 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+WordTokenizer
+"""
+
+__all__ = ["WordTokenizer"]
+
+
+from ....entrypoints.transforms_wordtokenizer import transforms_wordtokenizer
+from ....utils.utils import trace
+from ...base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class WordTokenizer(BasePipelineItem, DefaultSignature):
+    """
+    **Description**
+        The input to this transform is text, and the output is a vector of text containing the words (tokens) in the original text. The separator is space, but can be specified as any other character (or multiple characters) if needed.
+
+    :param char_array_term_separators: Array of single character term
+        separator(s). By default uses space character separator.
+
+    :param params: Additional arguments sent to compute engine.
+
+    """
+
+    @trace
+    def __init__(
+            self,
+            char_array_term_separators=None,
+            **params):
+        BasePipelineItem.__init__(
+            self, type='transform', **params)
+
+        self.char_array_term_separators = char_array_term_separators
+
+    @property
+    def _entrypoint(self):
+        return transforms_wordtokenizer
+
+    @trace
+    def _get_node(self, **all_args):
+
+        input_columns = self.input
+        if input_columns is None and 'input' in all_args:
+            input_columns = all_args['input']
+        if 'input' in all_args:
+            all_args.pop('input')
+
+        output_columns = self.output
+        if output_columns is None and 'output' in all_args:
+            output_columns = all_args['output']
+        if 'output' in all_args:
+            all_args.pop('output')
+
+        # validate input
+        if input_columns is None:
+            raise ValueError(
+                "'None' input passed when it cannot be none.")
+
+        if not isinstance(input_columns, list):
+            raise ValueError(
+                "input has to be a list of strings, instead got %s" %
+                type(input_columns))
+
+        # validate output
+        if output_columns is None:
+            output_columns = input_columns
+
+        if not isinstance(output_columns, list):
+            raise ValueError(
+                "output has to be a list of strings, instead got %s" %
+                type(output_columns))
+
+        algo_args = dict(
+            column=[
+                dict(
+                    Source=i,
+                    Name=o) for i,
+                o in zip(
+                    input_columns,
+                    output_columns)] if input_columns else None,
+            char_array_term_separators=self.char_array_term_separators)
+
+        all_args.update(algo_args)
+        return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/entrypoints/models_schema.py b/src/python/nimbusml/internal/entrypoints/models_schema.py
new file mode 100644
index 00000000..0b8b0056
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/models_schema.py
@@ -0,0 +1,47 @@
+"""
+Models.Summarizer
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def models_schema(
+        transform_model,
+        schema=None,
+        **params):
+    """
+    **Description**
+        Retreives input/output column schema for transform model.
+
+    :param transform_model: The transform model.
+    """
+
+    entrypoint_name = 'Models.Schema'
+    inputs = {}
+    outputs = {}
+
+    if transform_model is not None:
+        inputs['Model'] = try_set(
+            obj=transform_model,
+            none_acceptable=False,
+            is_of_type=str)
+    if schema is not None:
+        outputs['Schema'] = try_set(
+            obj=schema,
+            none_acceptable=False,
+            is_of_type=str)
+    
+    input_variables = {
+        x for x in unlist(inputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+    output_variables = {
+        x for x in unlist(outputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+
+    entrypoint = EntryPoint(
+        name=entrypoint_name, inputs=inputs, outputs=outputs,
+        input_variables=input_variables,
+        output_variables=output_variables)
+    return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_datasetscorerex.py b/src/python/nimbusml/internal/entrypoints/transforms_datasetscorerex.py
new file mode 100644
index 00000000..7a5d8c71
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_datasetscorerex.py
@@ -0,0 +1,68 @@
+"""
+Transforms.DatasetScorerEx
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_datasetscorerex(
+        data,
+        predictor_model,
+        scored_data=None,
+        scoring_transform=None,
+        suffix=None,
+        **params):
+    """
+    **Description**
+        Score a dataset with a predictor model
+
+    :param data: The dataset to be scored (inputs).
+    :param predictor_model: The predictor model to apply to data
+        (inputs).
+    :param suffix: Suffix to append to the score columns (inputs).
+    :param scored_data: The scored dataset (outputs).
+    :param scoring_transform: The scoring transform (outputs).
+    """
+
+    entrypoint_name = 'Transforms.DatasetScorerEx'
+    inputs = {}
+    outputs = {}
+
+    if data is not None:
+        inputs['Data'] = try_set(
+            obj=data,
+            none_acceptable=False,
+            is_of_type=str)
+    if predictor_model is not None:
+        inputs['PredictorModel'] = try_set(
+            obj=predictor_model,
+            none_acceptable=False,
+            is_of_type=str)
+    if suffix is not None:
+        inputs['Suffix'] = try_set(
+            obj=suffix,
+            none_acceptable=True,
+            is_of_type=str)
+    if scored_data is not None:
+        outputs['ScoredData'] = try_set(
+            obj=scored_data,
+            none_acceptable=False,
+            is_of_type=str)
+    if scoring_transform is not None:
+        outputs['ScoringTransform'] = try_set(
+            obj=scoring_transform, none_acceptable=False, is_of_type=str)
+
+    input_variables = {
+        x for x in unlist(inputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+    output_variables = {
+        x for x in unlist(outputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+
+    entrypoint = EntryPoint(
+        name=entrypoint_name, inputs=inputs, outputs=outputs,
+        input_variables=input_variables,
+        output_variables=output_variables)
+    return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_featureselectorbymutualinformation.py b/src/python/nimbusml/internal/entrypoints/transforms_featureselectorbymutualinformation.py
index 0663f8cd..74443348 100644
--- a/src/python/nimbusml/internal/entrypoints/transforms_featureselectorbymutualinformation.py
+++ b/src/python/nimbusml/internal/entrypoints/transforms_featureselectorbymutualinformation.py
@@ -55,7 +55,7 @@ def transforms_featureselectorbymutualinformation(
             none_acceptable=False,
             is_of_type=str)
     if label_column_name is not None:
-        inputs['LabelColumn'] = try_set(
+        inputs['LabelColumnName'] = try_set(
             obj=label_column_name,
             none_acceptable=True,
             is_of_type=str,
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_permutationfeatureimportance.py b/src/python/nimbusml/internal/entrypoints/transforms_permutationfeatureimportance.py
new file mode 100644
index 00000000..18ff2e51
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_permutationfeatureimportance.py
@@ -0,0 +1,81 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+Transforms.PermutationFeatureImportance
+"""
+
+import numbers
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_permutationfeatureimportance(
+        data,
+        predictor_model,
+        metrics=None,
+        use_feature_weight_filter=False,
+        number_of_examples_to_use=None,
+        permutation_count=1,
+        **params):
+    """
+    **Description**
+        Permutation Feature Importance (PFI)
+
+    :param data: Input dataset (inputs).
+    :param predictor_model: The path to the model file (inputs).
+    :param use_feature_weight_filter: Use feature weights to pre-
+        filter features (inputs).
+    :param number_of_examples_to_use: Limit the number of examples to
+        evaluate on (inputs).
+    :param permutation_count: The number of permutations to perform
+        (inputs).
+    :param metrics: The PFI metrics (outputs).
+    """
+
+    entrypoint_name = 'Transforms.PermutationFeatureImportance'
+    inputs = {}
+    outputs = {}
+
+    if data is not None:
+        inputs['Data'] = try_set(
+            obj=data,
+            none_acceptable=False,
+            is_of_type=str)
+    if predictor_model is not None:
+        inputs['PredictorModel'] = try_set(
+            obj=predictor_model,
+            none_acceptable=False,
+            is_of_type=str)
+    if use_feature_weight_filter is not None:
+        inputs['UseFeatureWeightFilter'] = try_set(
+            obj=use_feature_weight_filter,
+            none_acceptable=True,
+            is_of_type=bool)
+    if number_of_examples_to_use is not None:
+        inputs['NumberOfExamplesToUse'] = try_set(
+            obj=number_of_examples_to_use,
+            none_acceptable=True,
+            is_of_type=numbers.Real)
+    if permutation_count is not None:
+        inputs['PermutationCount'] = try_set(
+            obj=permutation_count,
+            none_acceptable=True,
+            is_of_type=numbers.Real)
+    if metrics is not None:
+        outputs['Metrics'] = try_set(
+            obj=metrics,
+            none_acceptable=False,
+            is_of_type=str)
+
+    input_variables = {
+        x for x in unlist(inputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+    output_variables = {
+        x for x in unlist(outputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+
+    entrypoint = EntryPoint(
+        name=entrypoint_name, inputs=inputs, outputs=outputs,
+        input_variables=input_variables,
+        output_variables=output_variables)
+    return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_prefixcolumnconcatenator.py b/src/python/nimbusml/internal/entrypoints/transforms_prefixcolumnconcatenator.py
new file mode 100644
index 00000000..cfe672b7
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_prefixcolumnconcatenator.py
@@ -0,0 +1,64 @@
+"""
+Transforms.PrefixColumnConcatenator
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_prefixcolumnconcatenator(
+        column,
+        data,
+        output_data=None,
+        model=None,
+        **params):
+    """
+    **Description**
+        Concatenates one or more columns of the same item type by prefix.
+
+    :param column: New column definition(s) (optional form:
+        name:srcs) (inputs).
+    :param data: Input dataset (inputs).
+    :param output_data: Transformed dataset (outputs).
+    :param model: Transform model (outputs).
+    """
+
+    entrypoint_name = 'Transforms.PrefixColumnConcatenator'
+    inputs = {}
+    outputs = {}
+
+    if column is not None:
+        inputs['Column'] = try_set(
+            obj=column,
+            none_acceptable=False,
+            is_of_type=list,
+            is_column=True)
+    if data is not None:
+        inputs['Data'] = try_set(
+            obj=data,
+            none_acceptable=False,
+            is_of_type=str)
+    if output_data is not None:
+        outputs['OutputData'] = try_set(
+            obj=output_data,
+            none_acceptable=False,
+            is_of_type=str)
+    if model is not None:
+        outputs['Model'] = try_set(
+            obj=model,
+            none_acceptable=False,
+            is_of_type=str)
+
+    input_variables = {
+        x for x in unlist(inputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+    output_variables = {
+        x for x in unlist(outputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+
+    entrypoint = EntryPoint(
+        name=entrypoint_name, inputs=inputs, outputs=outputs,
+        input_variables=input_variables,
+        output_variables=output_variables)
+    return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_variablecolumn.py b/src/python/nimbusml/internal/entrypoints/transforms_variablecolumn.py
new file mode 100644
index 00000000..16fca0ad
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_variablecolumn.py
@@ -0,0 +1,69 @@
+"""
+Transforms.VariableColumnTransform
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_variablecolumn(
+        data,
+        output_data=None,
+        model=None,
+        features=None,
+        length_column_name=None,
+        **params):
+    """
+    **Description**
+        Combines the specified input columns in to a
+        single variable length vectorized column.
+
+    :param data: Input dataset (inputs).
+    :param output_data: Transformed dataset (outputs).
+    :param model: Transform model (outputs).
+    """
+
+    entrypoint_name = 'Transforms.VariableColumnTransform'
+    inputs = {}
+    outputs = {}
+
+    if data is not None:
+        inputs['Data'] = try_set(
+            obj=data,
+            none_acceptable=False,
+            is_of_type=str)
+    if features is not None:
+        inputs['Features'] = try_set(
+            obj=features,
+            none_acceptable=True,
+            is_of_type=list,
+            is_column=True)
+    if length_column_name is not None:
+        inputs['LengthColumnName'] = try_set(
+            obj=length_column_name,
+            none_acceptable=True,
+            is_of_type=str)
+    if output_data is not None:
+        outputs['OutputData'] = try_set(
+            obj=output_data,
+            none_acceptable=False,
+            is_of_type=str)
+    if model is not None:
+        outputs['Model'] = try_set(
+            obj=model,
+            none_acceptable=False,
+            is_of_type=str)
+
+    input_variables = {
+        x for x in unlist(inputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+    output_variables = {
+        x for x in unlist(outputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+
+    entrypoint = EntryPoint(
+        name=entrypoint_name, inputs=inputs, outputs=outputs,
+        input_variables=input_variables,
+        output_variables=output_variables)
+    return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_wordtokenizer.py b/src/python/nimbusml/internal/entrypoints/transforms_wordtokenizer.py
new file mode 100644
index 00000000..e7fac07a
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_wordtokenizer.py
@@ -0,0 +1,76 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+Transforms.WordTokenizer
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_wordtokenizer(
+        data,
+        output_data=None,
+        model=None,
+        column=None,
+        char_array_term_separators=None,
+        **params):
+    """
+    **Description**
+        The input to this transform is text, and the output is a vector of
+        text containing the words (tokens) in the original text. The
+        separator is space, but can be specified as any other
+        character (or multiple characters) if needed.
+
+    :param column: New column definition(s) (inputs).
+    :param data: Input dataset (inputs).
+    :param char_array_term_separators: Array of single character term
+        separator(s). By default uses space character separator.
+        (inputs).
+    :param output_data: Transformed dataset (outputs).
+    :param model: Transform model (outputs).
+    """
+
+    entrypoint_name = 'Transforms.WordTokenizer'
+    inputs = {}
+    outputs = {}
+
+    if column is not None:
+        inputs['Column'] = try_set(
+            obj=column,
+            none_acceptable=True,
+            is_of_type=list,
+            is_column=True)
+    if data is not None:
+        inputs['Data'] = try_set(
+            obj=data,
+            none_acceptable=False,
+            is_of_type=str)
+    if char_array_term_separators is not None:
+        inputs['CharArrayTermSeparators'] = try_set(
+            obj=char_array_term_separators,
+            none_acceptable=True,
+            is_of_type=list)
+    if output_data is not None:
+        outputs['OutputData'] = try_set(
+            obj=output_data,
+            none_acceptable=False,
+            is_of_type=str)
+    if model is not None:
+        outputs['Model'] = try_set(
+            obj=model,
+            none_acceptable=False,
+            is_of_type=str)
+
+    input_variables = {
+        x for x in unlist(inputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+    output_variables = {
+        x for x in unlist(outputs.values())
+        if isinstance(x, str) and x.startswith("$")}
+
+    entrypoint = EntryPoint(
+        name=entrypoint_name, inputs=inputs, outputs=outputs,
+        input_variables=input_variables,
+        output_variables=output_variables)
+    return entrypoint
diff --git a/src/python/nimbusml/internal/utils/data_schema.py b/src/python/nimbusml/internal/utils/data_schema.py
index 0fb409e1..e1880dab 100644
--- a/src/python/nimbusml/internal/utils/data_schema.py
+++ b/src/python/nimbusml/internal/utils/data_schema.py
@@ -655,7 +655,7 @@ def handle_file(filename):
                 graph = Graph(*(graph_nodes), inputs=dict(file=filename),
                               outputs=dict(data=''))
                 st = FileDataStream(filename, schema=None)
-                (out_model, out_data, out_metrics) = graph.run(verbose=True,
+                (out_model, out_data, out_metrics, _) = graph.run(verbose=True,
                                                                X=st)
 
             if isinstance(filepath_or_buffer, StringIO):
@@ -882,6 +882,21 @@ def clean_name(col):
         final_schema.sort()
         return DataSchema(final_schema, **opt)
 
+    @staticmethod
+    def extract_idv_schema_from_file(path):
+        with open(path, 'r') as f:
+            lines = f.readlines()
+
+        col_regex = re.compile(r'#@\s*(col=.*)$')
+        col_specs = []
+
+        for line in lines:
+            match = col_regex.match(line)
+            if match:
+                col_specs.append(match.group(1))
+
+        return DataSchema(' '.join(col_specs))
+
 
 class COL:
     """
diff --git a/src/python/nimbusml/internal/utils/data_stream.py b/src/python/nimbusml/internal/utils/data_stream.py
index 7d490bc6..ea544307 100644
--- a/src/python/nimbusml/internal/utils/data_stream.py
+++ b/src/python/nimbusml/internal/utils/data_stream.py
@@ -399,13 +399,17 @@ def __init__(self, parent, columns):
 
 class BinaryDataStream(DataStream):
     """
-    Defines a data view.
+    Data accessor for IDV data format, see here https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewImplementation.md
     """
 
-    def __init__(self, filename):
-        # REVIEW: would be good to figure out a way to know the schema of the
-        # binary IDV.
-        super(BinaryDataStream, self).__init__(DataSchema(""))
+    def __init__(self, filename=None):
+        if filename:
+            schema_file_path = os.path.splitext(filename)[0] + '.schema'
+            schema = DataSchema.extract_idv_schema_from_file(schema_file_path)
+        else:
+            schema = DataSchema("")
+
+        super(BinaryDataStream, self).__init__(schema)
         self._filename = filename
 
     def __repr__(self):
@@ -419,7 +423,7 @@ def to_df(self):
         # Do not move these imports or the module fails
         # due to circular references.
         from ..entrypoints.transforms_nooperation import transforms_nooperation
-        from .entrypoints import Graph
+        from .entrypoints import Graph, DataOutputFormat
 
         no_op = transforms_nooperation(
             data='$data', output_data='$output_data')
@@ -427,8 +431,8 @@ def to_df(self):
         graph = Graph(
             dict(
                 data=''), dict(
-                output_data=''), False, *(graph_nodes))
-        (out_model, out_data, out_metrics) = graph.run(verbose=True, X=self)
+                output_data=''), DataOutputFormat.DF, *(graph_nodes))
+        (out_model, out_data, out_metrics, _) = graph.run(verbose=True, X=self)
         return out_data
 
     def head(self, n=5, skip=0):
@@ -438,7 +442,7 @@ def head(self, n=5, skip=0):
             transforms_rowtakefilter
         from ..entrypoints.transforms_rowskipfilter import \
             transforms_rowskipfilter
-        from .entrypoints import Graph
+        from .entrypoints import Graph, DataOutputFormat
         if n == 0:
             raise ValueError("n must be > 0")
         graph_nodes = []
@@ -456,10 +460,16 @@ def head(self, n=5, skip=0):
         graph = Graph(
             dict(
                 data=''), dict(
-                output_data=''), False, *(graph_nodes))
-        (out_model, out_data, out_metrics) = graph.run(verbose=True, X=self)
+                output_data=''), DataOutputFormat.DF, *(graph_nodes))
+        (out_model, out_data, out_metrics, _) = graph.run(verbose=True, X=self)
         return out_data
 
+    def get_dataframe_schema(self):
+        if not hasattr(self, '_df_schema') or not self._df_schema:
+            head = self.head(n=1)
+            self._df_schema = DataSchema.read_schema(head)
+        return self._df_schema
+
     def clone(self):
         """
         Copy/clone the object.
@@ -479,7 +489,7 @@ class DprepDataStream(BinaryDataStream):
     def __init__(self, dataflow=None, filename=None):
         if dataflow is None and filename is None:
             raise ValueError('Both dataflow object and filename are None')
-        super(DprepDataStream, self).__init__(DataSchema(""))
+        super(DprepDataStream, self).__init__()
         if dataflow is not None:
             (fd, filename) = tempfile.mkstemp(suffix='.dprep')
             fl = os.fdopen(fd, "wt")
diff --git a/src/python/nimbusml/internal/utils/dataframes.py b/src/python/nimbusml/internal/utils/dataframes.py
index fe46ac20..17572ad1 100644
--- a/src/python/nimbusml/internal/utils/dataframes.py
+++ b/src/python/nimbusml/internal/utils/dataframes.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 import six
-from pandas import DataFrame, Series, concat, Categorical
+from pandas import DataFrame, Series, concat, Categorical, to_datetime
 from pandas.api.types import infer_dtype
 from scipy.sparse import csr_matrix
 
@@ -47,6 +47,13 @@ def resolve_dataframe(dataframe):
                     # Workaround, empty dataframe needs to be sent as an array
                     # to convey type information
                     ret[name_i] = serie.values.reshape((len(serie), 1))
+
+                elif serie.dtype == np.dtype('datetime64[ns]'):
+                    values = serie.values.astype(np.int64, copy=False)
+                    values = values // 1000000 # convert from nanoseconds to milliseconds
+                    ret[str(i)] = values
+                    types.append(_global_dtype_to_char_dict[np.dtype('datetime64[ns]')])
+
                 elif serie.dtype == np.object or str(serie.dtype) == '<U1':
                     # This column might still be numeric, so we do another
                     # check.
@@ -200,18 +207,31 @@ def get_obj(el):
 
     return concat(els, axis=axis, join=join)
 
+def resolve_output_as_list(ret):
+    return list(ret.keys())
 
-def resolve_output(ret):
+def resolve_output_as_dataframe(ret):
     data = dict()
     for key in ret.keys():
         if not isinstance(ret[key], dict):
             data[key] = ret[key]
+        elif "..DateTime" in ret[key]:
+            data[key] = to_datetime(ret[key]["..DateTime"], unit='ms')
         else:
             data[key] = Categorical.from_codes(
                 ret[key]["..Data"], ret[key]["..KeyValues"])
     return DataFrame(data)
 
 
+def resolve_output_as_csrmatrix(ret):
+    matrix = csr_matrix((1, 1))
+
+    if all([k in ret for k in ('data', 'indices', 'indptr', 'shape')]):
+        matrix = csr_matrix((ret['data'], ret['indices'], ret['indptr']),
+                            shape=(ret['shape'][0], ret['shape'][1]))
+    return matrix
+
+
 # Any changes to this dictionary must also be done in the enum
 # ML_PY_TYPE_MAP_ENUM defined in DataViewInterop.h.
 _global_dtype_to_char_dict = {
@@ -230,5 +250,6 @@ def resolve_output(ret):
     np.dtype(np.float64): 'd',
     np.dtype(np.string_): 't',
     np.dtype(np.unicode): 'u',
+    np.dtype('datetime64[ns]'): 'z',
     'unsupported': 'x'
 }
diff --git a/src/python/nimbusml/internal/utils/entrypoints.py b/src/python/nimbusml/internal/utils/entrypoints.py
index 1e6037db..0e06ff15 100644
--- a/src/python/nimbusml/internal/utils/entrypoints.py
+++ b/src/python/nimbusml/internal/utils/entrypoints.py
@@ -23,7 +23,8 @@
 from .data_stream import BinaryDataStream
 from .data_stream import FileDataStream
 from .dataframes import resolve_dataframe, resolve_csr_matrix, pd_concat, \
-    resolve_output
+    resolve_output_as_dataframe, resolve_output_as_csrmatrix, \
+    resolve_output_as_list
 from .utils import try_set, set_clr_environment_vars, get_clr_path, \
     get_mlnet_path, get_dprep_path
 from ..libs.pybridge import px_call
@@ -141,6 +142,17 @@ def _get_temp_file(suffix=None):
     return file_name
 
 
+class DataOutputFormat(Enum):
+    # Regular pandas dataframe format
+    DF = 0
+    # IDV data format, see here https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewImplementation.md
+    IDV = 1
+    # csr_matrix sparse data format
+    CSR = 2
+    # list
+    LIST = 3
+
+
 class Graph(EntryPoint):
     """
     graph
@@ -166,7 +178,7 @@ def __init__(
             self,
             inputs=None,
             outputs=None,
-            output_binary_data_stream=False,
+            data_output_format=DataOutputFormat.DF,
             *nodes):
         Graph._check_nodes(nodes)
 
@@ -191,7 +203,7 @@ def __init__(
 
         self.nodes = nodes
         self._write_csv_time = 0
-        self._output_binary_data_stream = output_binary_data_stream
+        self._data_output_format = data_output_format
 
     def __iter__(self):
         return iter(self.nodes)
@@ -254,55 +266,14 @@ def nimbusml_runnable_graph(self):
             '"nodes"',
             '"Nodes"')
 
-    def run(
-            self,
-            X,
-            y=None,
-            seed=None,
-            parallel=None,
-            max_slots=-1,
-            random_state=None,
-            verbose=1,
-            **params):
-        """
-        run graph
-        """
-        code = ""
-        if parallel is not None:
-            if isinstance(parallel, six.integer_types):
-                code += "parallel = {} ".format(parallel)
-            else:
-                raise TypeError("parallel is not of 'int' type.")
-        if seed is not None:
-            if isinstance(seed, six.integer_types):
-                code += "seed = {} ".format(seed)
-            else:
-                raise TypeError("seed is not of 'int' type.")
-        if parallel is not None:
-            if isinstance(parallel, six.integer_types):
-                code += "parallel = {} ".format(parallel)
-            else:
-                raise TypeError("parallel is not of 'int' type.")
-        if max_slots is not None:
-            if isinstance(max_slots, six.integer_types):
-                code += "maxSlots = {} ".format(max_slots)
-            else:
-                raise TypeError("max_slots is not of 'int' type.")
-
-        if params.get("dryrun") is not None:
-            ret = 'graph = {%s} %s' % (str(self), code)
-        else:
-            ret = self.idv_bridge(X, y, code, random_state, verbose, **params)
-        return ret
-
     def _try_call_bridge(
             self,
             px_call,
             call_parameters,
-            code,
             verbose,
             concatenated,
-            output_modelfilename):
+            output_modelfilename,
+            output_predictor_modelfilename=None):
         try:
             ret = px_call(call_parameters)
         except RuntimeError as e:
@@ -324,9 +295,9 @@ def _try_call_bridge(
                             type(od), ','.join(od))
                 if isinstance(verbose, six.integer_types) and verbose >= 2:
                     raise BridgeRuntimeError(
-                        "{0}.\n--CODE--\n{1}\n--GRAPH--\n{2}\n--DATA--\n{3}"
-                        "\n--\nconcatenated={4}".format(
-                            str(e), code, str(self), vars, concatenated),
+                        "{0}.\n--GRAPH--\n{1}\n--DATA--\n{2}"
+                        "\n--\nconcatenated={3}".format(
+                            str(e), str(self), vars, concatenated),
                         model=output_modelfilename)
                 else:
                     raise BridgeRuntimeError(
@@ -348,12 +319,16 @@ def _get_separator(self):
             return None
         return pieces[0].replace("sep=", "").strip()
 
-    def idv_bridge(self, X, y, code, random_state=None, verbose=1, **params):
+    def run(self, X, y=None, max_slots=-1, random_state=None, verbose=1, **params):
+        if params.get("dryrun") is not None:
+            return 'graph = %s' % (str(self))
+
         output_modelfilename = None
+        output_predictor_modelfilename = None
         output_metricsfilename = None
         out_metrics = None
 
-        # Ideally, idv_bridge shouldn't care if it's running CV
+        # Ideally, run_graph shouldn't care if it's running CV
         # or a regular pipeline. That required changing the idv_bridge to be
         # more flexible (e.g. changing return value, changing input
         # structure, etc.) In my first attempt, this approach caused
@@ -421,15 +396,23 @@ def remove_multi_level_index(c):
                     output_modelfilename = _get_temp_file(suffix='.model.bin')
                     self.outputs['output_model'] = output_modelfilename
 
+                # set graph output model to temp file
+                if 'output_predictor_model' in self.outputs:
+                    output_predictor_modelfilename = _get_temp_file(suffix='.predictor.model.bin')
+                    self.outputs['output_predictor_model'] = output_predictor_modelfilename
+
                 # set graph output metrics to temp file
                 if 'output_metrics' in self.outputs:
                     output_metricsfilename = _get_temp_file(suffix='.txt')
                     self.outputs['output_metrics'] = output_metricsfilename
 
-                if 'output_data' in self.outputs and \
-                        self._output_binary_data_stream:
-                    output_idvfilename = _get_temp_file(suffix='.idv')
-                    self.outputs['output_data'] = output_idvfilename
+                if 'output_data' in self.outputs:
+                    if self._data_output_format == DataOutputFormat.IDV:
+                        output_idvfilename = _get_temp_file(suffix='.idv')
+                        self.outputs['output_data'] = output_idvfilename
+
+                    elif self._data_output_format == DataOutputFormat.CSR:
+                        self.outputs['output_data'] = "<csr>"
 
             # set graph file for debuggings
             if verbose > 0:
@@ -442,9 +425,7 @@ def remove_multi_level_index(c):
                     f.write(self.nimbusml_runnable_graph)
 
             call_parameters['verbose'] = try_set(verbose, False, six.integer_types)
-            call_parameters['graph'] = try_set(
-                'graph = {%s} %s' %
-                (str(self), code), False, str)
+            call_parameters['graph'] = try_set(str(self), False, str)
             
             # Set paths to .NET Core CLR, ML.NET and DataPrep libs
             set_clr_environment_vars()
@@ -455,23 +436,35 @@ def remove_multi_level_index(c):
 
             if random_state:
                 call_parameters['seed'] = try_set(random_state, False, six.integer_types)
+
+            if max_slots:
+                call_parameters['max_slots'] = try_set(max_slots, False, six.integer_types)
+
             ret = self._try_call_bridge(
                 px_call,
                 call_parameters,
-                code,
                 verbose,
                 concatenated,
-                output_modelfilename)
-
-            out_data = resolve_output(ret)
-            # remove label column from data
-            if out_data is not None and concatenated:
-                out_columns = list(out_data.columns)
-                if hasattr(y, 'columns'):
-                    y_column = y.columns[0]
-                    if y_column in out_columns:
-                        out_columns.remove(y_column)
-                        out_data = out_data[out_columns]
+                output_modelfilename,
+                output_predictor_modelfilename)
+
+            out_data = None
+
+            if not cv and self._data_output_format == DataOutputFormat.CSR:
+                out_data = resolve_output_as_csrmatrix(ret)
+            elif not cv and self._data_output_format == DataOutputFormat.LIST:
+                out_data = resolve_output_as_list(ret)
+            else:
+                out_data = resolve_output_as_dataframe(ret)
+                # remove label column from data
+                if out_data is not None and concatenated:
+                    out_columns = list(out_data.columns)
+                    if hasattr(y, 'columns'):
+                        y_column = y.columns[0]
+                        if y_column in out_columns:
+                            out_columns.remove(y_column)
+                            out_data = out_data[out_columns]
+
             if output_metricsfilename:
                 out_metrics = pd.read_csv(
                     output_metricsfilename,
@@ -484,18 +477,15 @@ def remove_multi_level_index(c):
 
             if cv:
                 return self._process_graph_run_results(out_data)
-            elif self._output_binary_data_stream:
+            elif self._data_output_format == DataOutputFormat.IDV:
                 output = BinaryDataStream(output_idvfilename)
-                return (output_modelfilename, output, out_metrics)
+                return (output_modelfilename, output, out_metrics, output_predictor_modelfilename)
             else:
-                return (output_modelfilename, out_data, out_metrics)
+                return (output_modelfilename, out_data, out_metrics, output_predictor_modelfilename)
         finally:
             if cv:
                 self._remove_temp_files()
             else:
-                if output_modelfilename:
-                    # os.remove(output_modelfilename)
-                    pass
                 if output_metricsfilename:
                     os.remove(output_metricsfilename)
 
diff --git a/src/python/nimbusml/linear_model/_linearsvmbinaryclassifier.py b/src/python/nimbusml/linear_model/_linearsvmbinaryclassifier.py
index 4f35d8c5..783a6ad5 100644
--- a/src/python/nimbusml/linear_model/_linearsvmbinaryclassifier.py
+++ b/src/python/nimbusml/linear_model/_linearsvmbinaryclassifier.py
@@ -78,7 +78,9 @@ class LinearSvmBinaryClassifier(
 
     :param caching: Whether trainer should cache input training data.
 
-    :param lambda_: Regularizer constant.
+    :param l2_regularization: L2 regularization weight. It also controls the
+        learning rate, with the learning rate being inversely proportional to
+        it.
 
     :param perform_projection: Perform projection to unit-ball? Typically used
         with batch size > 1.
@@ -114,7 +116,7 @@ def __init__(
             self,
             normalize='Auto',
             caching='Auto',
-            lambda_=0.001,
+            l2_regularization=0.001,
             perform_projection=False,
             number_of_iterations=1,
             initial_weights_diameter=0.0,
@@ -147,7 +149,7 @@ def __init__(
             self,
             normalize=normalize,
             caching=caching,
-            lambda_=lambda_,
+            l2_regularization=l2_regularization,
             perform_projection=perform_projection,
             number_of_iterations=number_of_iterations,
             initial_weights_diameter=initial_weights_diameter,
diff --git a/src/python/nimbusml/model_selection/_cv.py b/src/python/nimbusml/model_selection/_cv.py
index d719e07f..79a5def4 100644
--- a/src/python/nimbusml/model_selection/_cv.py
+++ b/src/python/nimbusml/model_selection/_cv.py
@@ -17,7 +17,7 @@
     transforms_manyheterogeneousmodelcombiner
 from ..internal.entrypoints.transforms_modelcombiner import \
     transforms_modelcombiner
-from ..internal.utils.entrypoints import Graph, GraphOutputType
+from ..internal.utils.entrypoints import Graph, GraphOutputType, DataOutputFormat
 
 
 # Extension method for extending a list of steps, with chaining
@@ -544,7 +544,7 @@ def fit(
             group_column=group_id)
 
         steps.add(cv_node)
-        graph = Graph(cv_aux_info.inputs, self.outputs, False, *steps)
+        graph = Graph(cv_aux_info.inputs, self.outputs, DataOutputFormat.DF, *steps)
 
         # prepare telemetry info
         class_name = type(self).__name__
@@ -557,7 +557,6 @@ def fit(
                 X=X,
                 y=y,
                 random_state=pipeline.random_state,
-                seed=pipeline.random_state,
                 w=weights,
                 verbose=verbose,
                 telemetry_info=telemetry_info,
diff --git a/src/python/nimbusml/preprocessing/__init__.py b/src/python/nimbusml/preprocessing/__init__.py
index 2af0b4b3..09a735c8 100644
--- a/src/python/nimbusml/preprocessing/__init__.py
+++ b/src/python/nimbusml/preprocessing/__init__.py
@@ -1,10 +1,12 @@
 from ._fromkey import FromKey
 from ._tokey import ToKey
 from ._tensorflowscorer import TensorFlowScorer
+from ._datasettransformer import DatasetTransformer
 
 __all__ = [
     'FromKey',
     'ToKey',
-    'TensorFlowScorer'
+    'TensorFlowScorer',
+    'DatasetTransformer'
 ]
 
diff --git a/src/python/nimbusml/preprocessing/_datasettransformer.py b/src/python/nimbusml/preprocessing/_datasettransformer.py
new file mode 100644
index 00000000..f3964a4b
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/_datasettransformer.py
@@ -0,0 +1,54 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+DatasetTransformer
+"""
+
+__all__ = ["DatasetTransformer"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.preprocessing._datasettransformer import \
+    DatasetTransformer as core
+from ..internal.utils.utils import trace
+
+
+class DatasetTransformer(core, BaseTransform, TransformerMixin):
+    """
+    **Description**
+        Applies a TransformModel to a dataset.
+
+    :param columns: see `Columns </nimbusml/concepts/columns>`_.
+
+    :param transform_model: Transform model.
+
+    :param params: Additional arguments sent to compute engine.
+
+    """
+
+    @trace
+    def __init__(
+            self,
+            transform_model,
+            columns=None,
+            **params):
+
+        if columns:
+            params['columns'] = columns
+        BaseTransform.__init__(self, **params)
+        core.__init__(
+            self,
+            transform_model=transform_model,
+            **params)
+        self._columns = columns
+
+    def get_params(self, deep=False):
+        """
+        Get the parameters for this operator.
+        """
+        return core.get_params(self)
diff --git a/src/python/nimbusml/preprocessing/normalization/__init__.py b/src/python/nimbusml/preprocessing/normalization/__init__.py
index 2c05bf41..a312a870 100644
--- a/src/python/nimbusml/preprocessing/normalization/__init__.py
+++ b/src/python/nimbusml/preprocessing/normalization/__init__.py
@@ -1,6 +1,7 @@
 from ._binner import Binner
 from ._globalcontrastrowscaler import GlobalContrastRowScaler
 from ._logmeanvariancescaler import LogMeanVarianceScaler
+from ._lpscaler import LpScaler
 from ._meanvariancescaler import MeanVarianceScaler
 from ._minmaxscaler import MinMaxScaler
 
@@ -8,6 +9,7 @@
     'Binner',
     'GlobalContrastRowScaler',
     'LogMeanVarianceScaler',
+    'LpScaler',
     'MeanVarianceScaler',
-    'MinMaxScaler',
+    'MinMaxScaler'
 ]
diff --git a/src/python/nimbusml/preprocessing/normalization/_lpscaler.py b/src/python/nimbusml/preprocessing/normalization/_lpscaler.py
new file mode 100644
index 00000000..e9fcbb34
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/normalization/_lpscaler.py
@@ -0,0 +1,68 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+LpScaler
+"""
+
+__all__ = ["LpScaler"]
+
+
+from sklearn.base import TransformerMixin
+
+from ...base_transform import BaseTransform
+from ...internal.core.preprocessing.normalization._lpscaler import \
+    LpScaler as core
+from ...internal.utils.utils import trace
+
+
+class LpScaler(core, BaseTransform, TransformerMixin):
+    """
+    **Description**
+        Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm.
+
+    :param columns: see `Columns </nimbusml/concepts/columns>`_.
+
+    :param norm: The norm to use to normalize each sample.
+
+    :param sub_mean: Subtract mean from each value before normalizing.
+
+    :param params: Additional arguments sent to compute engine.
+
+    """
+
+    @trace
+    def __init__(
+            self,
+            norm='L2',
+            sub_mean=False,
+            columns=None,
+            **params):
+
+        if columns:
+            params['columns'] = columns
+        BaseTransform.__init__(self, **params)
+        core.__init__(
+            self,
+            norm=norm,
+            sub_mean=sub_mean,
+            **params)
+        self._columns = columns
+
+    def get_params(self, deep=False):
+        """
+        Get the parameters for this operator.
+        """
+        return core.get_params(self)
+
+    def _nodes_with_presteps(self):
+        """
+        Inserts preprocessing before this one.
+        """
+        from ..schema import TypeConverter
+        return [
+            TypeConverter(
+                result_type='R4')._steal_io(self),
+            self]
diff --git a/src/python/nimbusml/preprocessing/schema/__init__.py b/src/python/nimbusml/preprocessing/schema/__init__.py
index a8dae9f8..c28d8ee4 100644
--- a/src/python/nimbusml/preprocessing/schema/__init__.py
+++ b/src/python/nimbusml/preprocessing/schema/__init__.py
@@ -2,6 +2,7 @@
 from ._columndropper import ColumnDropper
 from ._columnduplicator import ColumnDuplicator
 from ._columnselector import ColumnSelector
+from ._prefixcolumnconcatenator import PrefixColumnConcatenator
 from ._typeconverter import TypeConverter
 
 __all__ = [
@@ -9,6 +10,7 @@
     'ColumnDropper',
     'ColumnDuplicator',
     'ColumnSelector',
+    'PrefixColumnConcatenator',
     'TypeConverter'
 ]
 
diff --git a/src/python/nimbusml/preprocessing/schema/_prefixcolumnconcatenator.py b/src/python/nimbusml/preprocessing/schema/_prefixcolumnconcatenator.py
new file mode 100644
index 00000000..53eccf1f
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/schema/_prefixcolumnconcatenator.py
@@ -0,0 +1,86 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+"""
+PrefixColumnConcatenator
+"""
+
+__all__ = ["PrefixColumnConcatenator"]
+
+
+from sklearn.base import TransformerMixin
+
+from ...base_transform import BaseTransform
+from ...internal.core.preprocessing.schema._prefixcolumnconcatenator import \
+    PrefixColumnConcatenator as core
+from ...internal.utils.utils import trace
+
+
+class PrefixColumnConcatenator(core, BaseTransform, TransformerMixin):
+    """
+
+    Combines several columns into a single vector-valued column by prefix.
+
+    .. remarks::
+        ``PrefixColumnConcatenator`` creates a single vector-valued column from
+        multiple
+        columns. It can be performed on data before training a model. The
+        concatenation
+        can significantly speed up the processing of data when the number of
+        columns
+        is as large as hundreds to thousands.
+
+    :param columns: a dictionary of key-value pairs, where key is the output
+        column name and value is a list of input column names.
+
+         * Only one key-value pair is allowed.
+         * Input column type: numeric or string.
+         * Output column type:
+        `Vector Type </nimbusml/concepts/types#vectortype-column>`_.
+
+        The << operator can be used to set this value (see
+        `Column Operator </nimbusml/concepts/columns>`_)
+
+        For example
+         * ColumnConcatenator(columns={'features': ['age', 'parity',
+        'induced']})
+         * ColumnConcatenator() << {'features': ['age', 'parity',
+        'induced']})
+
+        For more details see `Columns </nimbusml/concepts/columns>`_.
+
+    :param params: Additional arguments sent to compute engine.
+
+    .. seealso::
+        :py:class:`ColumnDropper
+        <nimbusml.preprocessing.schema.ColumnDropper>`,
+        :py:class:`ColumnSelector
+        <nimbusml.preprocessing.schema.ColumnSelector>`.
+
+    .. index:: transform, schema
+
+    Example:
+       .. literalinclude:: /../nimbusml/examples/PrefixColumnConcatenator.py
+              :language: python
+    """
+
+    @trace
+    def __init__(
+            self,
+            columns=None,
+            **params):
+
+        if columns:
+            params['columns'] = columns
+        BaseTransform.__init__(self, **params)
+        core.__init__(
+            self,
+            **params)
+        self._columns = columns
+
+    def get_params(self, deep=False):
+        """
+        Get the parameters for this operator.
+        """
+        return core.get_params(self)
diff --git a/src/python/nimbusml/preprocessing/text/__init__.py b/src/python/nimbusml/preprocessing/text/__init__.py
index c312a30e..f40795e1 100644
--- a/src/python/nimbusml/preprocessing/text/__init__.py
+++ b/src/python/nimbusml/preprocessing/text/__init__.py
@@ -1,5 +1,7 @@
 from ._chartokenizer import CharTokenizer
+from ._wordtokenizer import WordTokenizer
 
 __all__ = [
-    'CharTokenizer'
-]
\ No newline at end of file
+    'CharTokenizer',
+    'WordTokenizer'
+]
diff --git a/src/python/nimbusml/preprocessing/text/_wordtokenizer.py b/src/python/nimbusml/preprocessing/text/_wordtokenizer.py
new file mode 100644
index 00000000..94a1c2ac
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/text/_wordtokenizer.py
@@ -0,0 +1,55 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+WordTokenizer
+"""
+
+__all__ = ["WordTokenizer"]
+
+
+from sklearn.base import TransformerMixin
+
+from ...base_transform import BaseTransform
+from ...internal.core.preprocessing.text._wordtokenizer import \
+    WordTokenizer as core
+from ...internal.utils.utils import trace
+
+
+class WordTokenizer(core, BaseTransform, TransformerMixin):
+    """
+    **Description**
+        The input to this transform is text, and the output is a vector of text containing the words (tokens) in the original text. The separator is space, but can be specified as any other character (or multiple characters) if needed.
+
+    :param columns: see `Columns </nimbusml/concepts/columns>`_.
+
+    :param char_array_term_separators: Array of single character term
+        separator(s). By default uses space character separator.
+
+    :param params: Additional arguments sent to compute engine.
+
+    """
+
+    @trace
+    def __init__(
+            self,
+            char_array_term_separators=None,
+            columns=None,
+            **params):
+
+        if columns:
+            params['columns'] = columns
+        BaseTransform.__init__(self, **params)
+        core.__init__(
+            self,
+            char_array_term_separators=char_array_term_separators,
+            **params)
+        self._columns = columns
+
+    def get_params(self, deep=False):
+        """
+        Get the parameters for this operator.
+        """
+        return core.get_params(self)
diff --git a/src/python/nimbusml/tests/data_type/test_datetime.py b/src/python/nimbusml/tests/data_type/test_datetime.py
new file mode 100644
index 00000000..fabab5b0
--- /dev/null
+++ b/src/python/nimbusml/tests/data_type/test_datetime.py
@@ -0,0 +1,140 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import os
+import sys
+import unittest
+import tempfile
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, DprepDataStream
+from nimbusml.preprocessing.missing_values import Handler
+
+
+def get_temp_file(suffix=None):
+    fd, file_name = tempfile.mkstemp(suffix=suffix)
+    fl = os.fdopen(fd, 'w')
+    fl.close()
+    return file_name
+
+
+class TestDateTimeDataType(unittest.TestCase):
+    def test_negative_values(self):
+        milliseconds_in_year = 365*24*60*60*1000
+        data = [i * milliseconds_in_year for i in [-1, -2, -3, -3.3]]
+
+        df = pd.DataFrame({'c1': data, 'c2': [3,4,5,6]})
+        df = df.astype({'c1': np.dtype('datetime64[ms]')})
+
+        pipeline = Pipeline(steps=[Handler(columns={'c2': 'c2'})])
+        result = pipeline.fit_transform(df)
+
+        self.assertTrue(result.loc[:, 'c1'].equals(df.loc[:, 'c1']))
+        self.assertEqual(result.loc[:, 'c1'].dtype, np.dtype('datetime64[ns]'))
+
+        self.assertEqual(result.loc[0, 'c1'].year, 1969)
+        self.assertEqual(result.loc[0, 'c1'].hour, 0)
+        self.assertEqual(result.loc[0, 'c1'].minute, 0)
+        self.assertEqual(result.loc[0, 'c1'].second, 0)
+
+        self.assertEqual(result.loc[3, 'c1'].year, 1966)
+
+    def test_timestamp_boundaries(self):
+        # Here are the current min and max for a Pandas Timestamp
+        # 1677-09-21 00:12:43.145225
+        # 2262-04-11 23:47:16.854775807
+
+        data = [pd.Timestamp(1677, 9, 22, 1), pd.Timestamp.max]
+        df = pd.DataFrame({'c1': data, 'c2': [3,4]})
+        df = df.astype({'c1': np.dtype('datetime64[ms]')})
+
+        pipeline = Pipeline(steps=[Handler(columns={'c2': 'c2'})])
+        result = pipeline.fit_transform(df)
+
+        self.assertTrue(result.loc[:, 'c1'].equals(df.loc[:, 'c1']))
+        self.assertEqual(result.dtypes[0], np.dtype('datetime64[ns]'))
+
+        self.assertEqual(result.loc[0, 'c1'].year, 1677)
+        self.assertEqual(result.loc[0, 'c1'].month, 9)
+        self.assertEqual(result.loc[0, 'c1'].day, 22)
+
+        self.assertEqual(result.loc[1, 'c1'].year, 2262)
+        self.assertEqual(result.loc[1, 'c1'].month, 4)
+        self.assertEqual(result.loc[1, 'c1'].day, 11)
+
+    def test_datetime_column_parsed_from_string(self):
+        dates = ["2018-01-02", "2018-02-01"]
+        df = pd.DataFrame({'c1': dates, 'c2': [3,4]})
+
+        file_name = get_temp_file('.csv')
+        df.to_csv(file_name)
+        df = pd.read_csv(file_name, parse_dates=['c1'], index_col=0)
+
+        self.assertEqual(df.dtypes[0], np.dtype('datetime64[ns]'))
+
+        pipeline = Pipeline(steps=[Handler(columns={'c2': 'c2'})])
+        result = pipeline.fit_transform(df)
+
+        self.assertEqual(result.loc[0, 'c1'].year, 2018)
+        self.assertEqual(result.loc[0, 'c1'].month, 1)
+        self.assertEqual(result.loc[0, 'c1'].day, 2)
+        self.assertEqual(result.loc[0, 'c1'].hour, 0)
+        self.assertEqual(result.loc[0, 'c1'].minute, 0)
+        self.assertEqual(result.loc[0, 'c1'].second, 0)
+
+        self.assertEqual(result.loc[1, 'c1'].year, 2018)
+        self.assertEqual(result.loc[1, 'c1'].month, 2)
+        self.assertEqual(result.loc[1, 'c1'].day, 1)
+        self.assertEqual(result.loc[1, 'c1'].hour, 0)
+        self.assertEqual(result.loc[1, 'c1'].minute, 0)
+        self.assertEqual(result.loc[1, 'c1'].second, 0)
+
+        self.assertEqual(len(result), 2)
+        self.assertEqual(result.dtypes[0], np.dtype('datetime64[ns]'))
+
+        os.remove(file_name)
+
+    @unittest.skipIf(sys.version_info[:2] == (2, 7), "azureml-dataprep is not installed.")
+    def test_dprep_datastream(self):
+        import azureml.dataprep as dprep
+
+        dates = ["2018-01-02 00:00:00", "2018-02-01 10:00:00"]
+        col2 = ['0', '1']
+        label_array = np.repeat([0], 2)
+        train_df = pd.DataFrame({'col1': dates, 'col2': col2, 'label': label_array})
+
+        pipeline = Pipeline(steps=[
+            Handler(columns={'2': 'col2'}, concat=False, impute_by_slot=True, replace_with='Mean')
+        ])
+
+        file_name = get_temp_file('.csv')
+        train_df.to_csv(file_name)
+
+        dataflow = dprep.read_csv(file_name, infer_column_types=True)
+        dprepDataStream = DprepDataStream(dataflow)
+
+        result = pipeline.fit_transform(dprepDataStream)
+
+        self.assertEqual(result.loc[:, 'col1'].dtype, np.dtype('datetime64[ns]'))
+
+        self.assertEqual(result.loc[0, 'col1'].year, 2018)
+        self.assertEqual(result.loc[0, 'col1'].month, 1)
+        self.assertEqual(result.loc[0, 'col1'].day, 2)
+        self.assertEqual(result.loc[0, 'col1'].hour, 0)
+        self.assertEqual(result.loc[0, 'col1'].minute, 0)
+        self.assertEqual(result.loc[0, 'col1'].second, 0)
+
+        self.assertEqual(result.loc[1, 'col1'].year, 2018)
+        self.assertEqual(result.loc[1, 'col1'].month, 2)
+        self.assertEqual(result.loc[1, 'col1'].day, 1)
+        self.assertEqual(result.loc[1, 'col1'].hour, 10)
+        self.assertEqual(result.loc[1, 'col1'].minute, 0)
+        self.assertEqual(result.loc[1, 'col1'].second, 0)
+
+        os.remove(file_name)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/decomposition/test_pcaanomalydetector.py b/src/python/nimbusml/tests/decomposition/test_pcaanomalydetector.py
index f3d81ea2..c5a04806 100644
--- a/src/python/nimbusml/tests/decomposition/test_pcaanomalydetector.py
+++ b/src/python/nimbusml/tests/decomposition/test_pcaanomalydetector.py
@@ -25,7 +25,7 @@ def test_PcaAnomalyDetector(self):
         scores = svm.predict(X_test)
         assert_almost_equal(
             scores.sum().sum(),
-            4.181632,
+            4.1786637,
             decimal=7,
             err_msg="Sum should be %s" %
                     4.181632)
diff --git a/src/python/nimbusml/tests/dprep/test_dprep.py b/src/python/nimbusml/tests/dprep/test_dprep.py
index c8ebbbdb..a2061c51 100644
--- a/src/python/nimbusml/tests/dprep/test_dprep.py
+++ b/src/python/nimbusml/tests/dprep/test_dprep.py
@@ -28,7 +28,7 @@ def assert_2d_array_equal(actual, desired):
                 continue
             assert_true(actual[i][y] == desired[i][y])
 
-@unittest.skipIf(os.name == "posix" or sys.version_info[:2] != (3, 7), "azureml-dataprep is not installed.")
+@unittest.skipIf(sys.version_info[:2] == (2, 7), "azureml-dataprep is not installed.")
 class TestDprep(unittest.TestCase):
 
     def test_fit_transform(self):
diff --git a/src/python/nimbusml/tests/ensemble/test_ensembleregressor.py b/src/python/nimbusml/tests/ensemble/test_ensembleregressor.py
index 5c61d9b2..a3c95495 100644
--- a/src/python/nimbusml/tests/ensemble/test_ensembleregressor.py
+++ b/src/python/nimbusml/tests/ensemble/test_ensembleregressor.py
@@ -33,7 +33,7 @@ def test_ensembleregressor(self):
         scores = ensemble.predict(X_test)
 
         r2 = r2_score(y_test, scores)
-        assert_greater(r2, 0.12, "should be greater than %s" % 0.12)
+        assert_greater(r2, 0.105, "should be greater than %s" % 0.105)
         assert_less(r2, 0.13, "sum should be less than %s" % 0.13)
 
         ensemble_with_options = EnsembleRegressor(
@@ -46,8 +46,8 @@ def test_ensembleregressor(self):
         scores = ensemble_with_options.predict(X_test)
 
         r2 = r2_score(y_test, scores)
-        assert_greater(r2, 0.0279, "R-Squared  should be greater than %s" % 0.0279)
-        assert_less(r2, 0.03, "R-Squared should be less than %s" % 0.03)
+        assert_greater(r2, 0.07, "R-Squared  should be greater than %s" % 0.07)
+        assert_less(r2, 0.08, "R-Squared should be less than %s" % 0.08)
 
 
 if __name__ == '__main__':
diff --git a/src/python/nimbusml/tests/idv/test_idv.py b/src/python/nimbusml/tests/idv/test_idv.py
index e86f2226..c92fb092 100644
--- a/src/python/nimbusml/tests/idv/test_idv.py
+++ b/src/python/nimbusml/tests/idv/test_idv.py
@@ -9,8 +9,10 @@
 import pandas as pd
 from nimbusml import Pipeline, FileDataStream, BinaryDataStream
 from nimbusml.datasets import get_dataset
-from nimbusml.linear_model import FastLinearRegressor
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.linear_model import FastLinearRegressor, OnlineGradientDescentRegressor
 from nimbusml.preprocessing.normalization import MinMaxScaler
+from nimbusml.preprocessing.schema import ColumnDropper
 from sklearn.utils.testing import assert_true, assert_array_equal
 
 # data input (as a FileDataStream)
@@ -105,6 +107,113 @@ def test_test(self):
         assert_array_equal(scores, scores_df)
         assert_array_equal(metrics, metrics_df)
 
+    def test_fit_predictor_with_idv(self):
+        train_data = {'c0': ['a', 'b', 'a', 'b'],
+                      'c1': [1, 2, 3, 4],
+                      'c2': [2, 3, 4, 5]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        test_data = {'c0': ['a', 'b', 'b'],
+                     'c1': [1.5, 2.3, 3.7],
+                     'c2': [2.2, 4.9, 2.7]}
+        test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
+                                                  'c2': np.float64})
+
+        # Fit a transform pipeline to the training data
+        transform_pipeline = Pipeline([OneHotVectorizer() << 'c0'])
+        transform_pipeline.fit(train_df)
+        df = transform_pipeline.transform(train_df, as_binary_data_stream=True)
+
+        # Fit a predictor pipeline given a transformed BinaryDataStream
+        predictor = OnlineGradientDescentRegressor(label='c2', feature=['c0', 'c1'])
+        predictor_pipeline = Pipeline([predictor])
+        predictor_pipeline.fit(df)
+
+        # Perform a prediction given the test data using
+        # the transform and predictor defined previously.
+        df = transform_pipeline.transform(test_df, as_binary_data_stream=True)
+        result_1 = predictor_pipeline.predict(df)
+
+        # Create expected result
+        xf = OneHotVectorizer() << 'c0'
+        df = xf.fit_transform(train_df)
+        predictor = OnlineGradientDescentRegressor(label='c2', feature=['c0.a', 'c0.b', 'c1'])
+        predictor.fit(df)
+        df = xf.transform(test_df)
+        expected_result = predictor.predict(df)
+
+        self.assertTrue(result_1.loc[:, 'Score'].equals(expected_result))
+
+    def test_fit_transform_with_idv(self):
+        path = get_dataset('infert').as_filepath()
+        data = FileDataStream.read_csv(path)
+
+        featurization_pipeline = Pipeline([OneHotVectorizer(columns={'education': 'education'})])
+        featurization_pipeline.fit(data)
+        featurized_data = featurization_pipeline.transform(data, as_binary_data_stream=True)
+
+        schema = featurized_data.schema
+        num_columns = len(schema)
+        self.assertTrue('case' in schema)
+        self.assertTrue('row_num' in schema)
+
+        pipeline = Pipeline([ColumnDropper() << ['case', 'row_num']])
+        pipeline.fit(featurized_data)
+        result = pipeline.transform(featurized_data, as_binary_data_stream=True)
+
+        schema = result.schema
+        self.assertEqual(len(schema), num_columns - 2)
+        self.assertTrue('case' not in schema)
+        self.assertTrue('row_num' not in schema)
+
+    def test_schema_with_vectorized_column(self):
+        path = get_dataset('infert').as_filepath()
+        data = FileDataStream.read_csv(path)
+
+        featurization_pipeline = Pipeline([OneHotVectorizer(columns={'education': 'education'})])
+        featurization_pipeline.fit(data)
+        featurized_data = featurization_pipeline.transform(data, as_binary_data_stream=True)
+
+        # col=row_num:I8:0 col=education:R4:1-3 col=age:I8:4 col=parity:I8:5
+        # col=induced:I8:6 col=case:I8:7 col=spontaneous:I8:8 col=stratum:I8:9
+        # col=pooled.stratum:I8:10 quote+
+        schema = featurized_data.schema
+
+        self.assertEqual(len(schema), 9)
+        self.assertEqual(schema['age'].Type, 'I8')
+        self.assertEqual(schema['age'].Name, 'age')
+        self.assertEqual(schema['age'].IsVector, False)
+
+        self.assertEqual(schema['education'].Type, 'R4')
+        self.assertEqual(schema['education'].Name, 'education')
+        self.assertEqual(len(schema['education'].Pos), 3)
+        self.assertEqual(schema['education'].IsVector, True)
+
+        self.assertTrue('education.0-5yrs' not in schema)
+        self.assertTrue('education.6-11yrs' not in schema)
+        self.assertTrue('education.12+yrs' not in schema)
+
+        # col=row_num:I8:0 col=education.0-5yrs:R4:1 col=education.6-11yrs:R4:2
+        # col=education.12+yrs:R4:3 col=age:I8:4 col=parity:I8:5 col=induced:I8:6
+        # col=case:I8:7 col=spontaneous:I8:8 col=stratum:I8:9 col=pooled.stratum:I8:10
+        # quote+ header=+
+        schema = featurized_data.get_dataframe_schema()
+
+        self.assertEqual(len(schema), 11)
+        self.assertEqual(schema['age'].Type, 'I8')
+        self.assertEqual(schema['age'].Name, 'age')
+        self.assertEqual(schema['age'].IsVector, False)
+
+        self.assertTrue('education' not in schema)
+        self.assertTrue('education.0-5yrs' in schema)
+        self.assertTrue('education.6-11yrs' in schema)
+        self.assertTrue('education.12+yrs' in schema)
+
+        self.assertEqual(schema['education.0-5yrs'].Type, 'R4')
+        self.assertEqual(schema['education.0-5yrs'].Name, 'education.0-5yrs')
+        self.assertEqual(schema['education.0-5yrs'].IsVector, False)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/python/nimbusml/tests/linear_model/test_averagedperceptronbinaryclassifier.py b/src/python/nimbusml/tests/linear_model/test_averagedperceptronbinaryclassifier.py
index 96397f70..bcdc6530 100644
--- a/src/python/nimbusml/tests/linear_model/test_averagedperceptronbinaryclassifier.py
+++ b/src/python/nimbusml/tests/linear_model/test_averagedperceptronbinaryclassifier.py
@@ -37,7 +37,7 @@ def setUpClass(cls):
     def test_averagedperceptron(self):
         accuracy = get_accuracy(self, AveragedPerceptronBinaryClassifier())
         # Accuracy depends on column Unnamed0 (index).
-        assert_greater(accuracy, 0.98, "accuracy should be %s" % 0.98)
+        assert_greater(accuracy, 0.93, "accuracy should be greater than %s" % 0.93)
 
     def test_averagedperceptron_supported_losses(self):
         # bug: 'exp' fails on this test
diff --git a/src/python/nimbusml/tests/model_selection/test_sweep.py b/src/python/nimbusml/tests/model_selection/test_sweep.py
index 5a5f0b32..4faa1993 100644
--- a/src/python/nimbusml/tests/model_selection/test_sweep.py
+++ b/src/python/nimbusml/tests/model_selection/test_sweep.py
@@ -18,7 +18,7 @@
 from nimbusml.feature_extraction.text import NGramFeaturizer
 from nimbusml.feature_extraction.text import WordEmbedding
 from nimbusml.feature_extraction.text.extractor import Ngram
-from nimbusml.linear_model import FastLinearBinaryClassifier
+from nimbusml.linear_model import FastLinearBinaryClassifier, AveragedPerceptronBinaryClassifier
 from nimbusml.utils import get_X_y
 from sklearn.model_selection import GridSearchCV
 from sklearn.utils.testing import assert_raises
@@ -68,12 +68,8 @@ def test_hyperparameters_sweep(self):
             'learner__number_of_trees': 1}
 
     def test_learners_sweep(self):
-        # grid search over 2 learners, even though pipe defined with
-        # FastTreesBinaryClassifier
-        # FastLinearBinaryClassifier learner wins, meaning we grid searched
-        # over it
+        # grid search over 2 learners
         np.random.seed(0)
-
         df = pd.DataFrame(dict(education=['A', 'A', 'A', 'A', 'B', 'A', 'B'],
                                workclass=['X', 'Y', 'X', 'X', 'X', 'Y', 'Y'],
                                y=[1, 0, 1, 1, 0, 1, 0]))
@@ -86,17 +82,13 @@ def test_learners_sweep(self):
 
         param_grid = dict(
             learner=[
-                FastLinearBinaryClassifier(),
-                FastTreesBinaryClassifier()],
-            learner__number_of_threads=[
-                1,
-                4])
+                AveragedPerceptronBinaryClassifier(),
+                FastTreesBinaryClassifier()])
         grid = GridSearchCV(pipe, param_grid)
 
         grid.fit(X, y)
         assert grid.best_params_[
-            'learner'].__class__.__name__ == 'FastLinearBinaryClassifier'
-        assert grid.best_params_['learner__number_of_threads'] == 1
+            'learner'].__class__.__name__ == 'AveragedPerceptronBinaryClassifier'
 
     @unittest.skipIf(
         six.PY2,
diff --git a/src/python/nimbusml/tests/model_summary/test_model_summary.py b/src/python/nimbusml/tests/model_summary/test_model_summary.py
index e21d25e9..650238ae 100644
--- a/src/python/nimbusml/tests/model_summary/test_model_summary.py
+++ b/src/python/nimbusml/tests/model_summary/test_model_summary.py
@@ -67,25 +67,20 @@
     OrdinaryLeastSquaresRegressor(),
     PoissonRegressionRegressor(),
     OneVsRestClassifier(FastLinearBinaryClassifier()),
-    LightGbmClassifier(),
     GamRegressor(),
     GamBinaryClassifier(),
     PcaAnomalyDetector(),
     FactorizationMachineBinaryClassifier(),
     KMeansPlusPlus(),
-    NaiveBayesClassifier()
-
-    # Skipping these tests since they are throwing the following error:
-    #   *** System.NotSupportedException: 'Column has variable length
-    #   vector: CategoricalSplitFeatures. Not supported in python.
-    #   Drop column before sending to Python
-    #FastForestBinaryClassifier(),
-    #FastForestRegressor(),
-    #FastTreesBinaryClassifier(),
-    #FastTreesRegressor(),
-    #FastTreesTweedieRegressor(),
-    #LightGbmRegressor(),
-    #LightGbmBinaryClassifier(),
+    NaiveBayesClassifier(),
+    FastForestBinaryClassifier(number_of_trees=2), 
+    FastForestRegressor(number_of_trees=2),
+    FastTreesBinaryClassifier(number_of_trees=2),
+    FastTreesRegressor(number_of_trees=2),
+    FastTreesTweedieRegressor(number_of_trees=2),
+    LightGbmRegressor(number_of_iterations=2),
+    LightGbmClassifier(),
+    LightGbmBinaryClassifier(number_of_iterations=2)
 ]
 
 learners_not_supported = [
diff --git a/src/python/nimbusml/tests/pipeline/test_csr_input.py b/src/python/nimbusml/tests/pipeline/test_csr_input.py
new file mode 100644
index 00000000..176a7651
--- /dev/null
+++ b/src/python/nimbusml/tests/pipeline/test_csr_input.py
@@ -0,0 +1,65 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import os
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.linear_model import LogisticRegressionBinaryClassifier
+from nimbusml.preprocessing import DatasetTransformer
+from nimbusml.preprocessing.schema import PrefixColumnConcatenator
+from nimbusml.preprocessing.schema import ColumnDropper
+from numpy.testing import assert_equal
+
+class TestCsrInput(unittest.TestCase):
+
+    def test_predict_proba_on_csr(self):
+        path = get_dataset('infert').as_filepath()
+        data = FileDataStream.read_csv(path)
+        cols = list(data.head(1).columns.values) # ordered data column names.
+ 
+        # train featurizer
+        featurization_pipeline = Pipeline([OneHotVectorizer(columns={'education': 'education'})])
+        featurization_pipeline.fit(data)
+        # Note: the relative order of all columns is still the same as in raw data.
+        #print(featurization_pipeline.get_output_columns())
+
+        # need to remove extra columns before getting csr_matrix featurized data as it wont have column name information.
+        csr_featurization_pipeline = Pipeline([DatasetTransformer(featurization_pipeline.model), ColumnDropper() << ['case', 'row_num']])
+        sparse_featurized_data = csr_featurization_pipeline.fit_transform(data, as_csr=True)
+        # Note: the relative order of all columns is still the same.
+        #print(csr_featurization_pipeline.get_output_columns())
+
+        # train learner
+        # Note: order & number of feature columns for learner (parameter 'feature') should be the same as in csr_matrix above
+        cols.remove('row_num')
+        cols.remove('case')
+        feature_cols = cols
+        #print(feature_cols)
+        #['education', 'age', 'parity', 'induced', 'spontaneous', 'stratum', 'pooled.stratum']
+        training_pipeline = Pipeline([DatasetTransformer(featurization_pipeline.model), LogisticRegressionBinaryClassifier(feature=feature_cols, label='case')])
+        training_pipeline.fit(data, output_predictor_model=True)
+ 
+        # load just a learner model
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(training_pipeline.predictor_model)
+        # see the order of Feature.* columns that get passed to learner algo
+        #print(predictor_pipeline.get_output_columns())
+
+        # use just a learner model on csr_matrix featurized data
+        predictions = predictor_pipeline.predict_proba(sparse_featurized_data)
+        assert_equal(len(predictions), 248)
+        assert_equal(len(predictions[0]), 2)
+
+        # get feature contributions
+        fcc = predictor_pipeline.get_feature_contributions(sparse_featurized_data)
+        assert_equal(fcc.shape, (248,30))
+
+if __name__ == '__main__':
+    unittest.main()
+
diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py
index fc112fe5..19bc26ce 100644
--- a/src/python/nimbusml/tests/pipeline/test_load_save.py
+++ b/src/python/nimbusml/tests/pipeline/test_load_save.py
@@ -7,10 +7,13 @@
 import pickle
 import unittest
 
+import numpy as np
+import pandas as pd
+
 from nimbusml import Pipeline
 from nimbusml.datasets import get_dataset
 from nimbusml.feature_extraction.categorical import OneHotVectorizer
-from nimbusml.linear_model import FastLinearBinaryClassifier
+from nimbusml.linear_model import FastLinearBinaryClassifier, OnlineGradientDescentRegressor
 from nimbusml.utils import get_X_y
 from numpy.testing import assert_almost_equal
 
@@ -326,5 +329,43 @@ def test_predictor_loaded_from_zip_has_feature_contributions(self):
 
         os.remove(model_filename)
 
+    def test_pickled_pipeline_with_predictor_model(self):
+        train_data = {'c1': [1, 2, 3, 4], 'c2': [2, 3, 4, 5]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        test_data = {'c1': [1.5, 2.3, 3.7], 'c2': [2.2, 4.9, 2.7]}
+        test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
+                                                  'c2': np.float64})
+
+        # Create predictor model and use it to predict 
+        pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')], random_state=0)
+        pipeline.fit(train_df, output_predictor_model=True)
+        result_1 = pipeline.predict(test_df)
+
+        self.assertTrue(pipeline.model)
+        self.assertTrue(pipeline.predictor_model)
+        self.assertNotEqual(pipeline.model, pipeline.predictor_model)
+
+        pickle_filename = 'nimbusml_model.p'
+        with open(pickle_filename, 'wb') as f:
+            pickle.dump(pipeline, f)
+
+        os.remove(pipeline.model)
+        os.remove(pipeline.predictor_model)
+
+        with open(pickle_filename, "rb") as f:
+            pipeline_pickle = pickle.load(f)
+
+        os.remove(pickle_filename)
+
+        # Load predictor pipeline and score data
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(pipeline_pickle.predictor_model)
+        result_2 = predictor_pipeline.predict(test_df)
+
+        self.assertTrue(result_1.equals(result_2))
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py b/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py
new file mode 100644
index 00000000..347b2798
--- /dev/null
+++ b/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py
@@ -0,0 +1,125 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import os
+import unittest
+
+from nimbusml import FileDataStream
+from nimbusml import Pipeline
+from nimbusml.datasets import get_dataset
+from nimbusml.ensemble import LightGbmRanker
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.linear_model import LogisticRegressionBinaryClassifier, \
+    FastLinearClassifier, FastLinearRegressor
+from nimbusml.preprocessing import ToKey
+from numpy.testing import assert_almost_equal
+from pandas.testing import assert_frame_equal
+
+class TestPermutationFeatureImportance(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(self):
+        adult_path = get_dataset('uciadult_train').as_filepath()
+        self.classification_data = FileDataStream.read_csv(adult_path)
+        binary_pipeline = Pipeline([
+            OneHotVectorizer(columns=['education']),
+            LogisticRegressionBinaryClassifier(
+                feature=['age', 'education'], label='label',
+                number_of_threads=1)])
+        self.binary_model = binary_pipeline.fit(self.classification_data)
+        self.binary_pfi = self.binary_model.permutation_feature_importance(self.classification_data)
+        classifier_pipeline = Pipeline([
+            OneHotVectorizer(columns=['education']),
+            FastLinearClassifier(feature=['age', 'education'], label='label',
+                                 number_of_threads=1, shuffle=False)])
+        self.classifier_model = classifier_pipeline.fit(self.classification_data)
+        self.classifier_pfi = self.classifier_model.permutation_feature_importance(self.classification_data)
+
+        infert_path = get_dataset('infert').as_filepath()
+        self.regression_data = FileDataStream.read_csv(infert_path)
+        regressor_pipeline = Pipeline([
+            OneHotVectorizer(columns=['education']),
+            FastLinearRegressor(feature=['induced', 'education'], label='age',
+                                number_of_threads=1, shuffle=False)])
+        self.regressor_model = regressor_pipeline.fit(self.regression_data)
+        self.regressor_pfi = self.regressor_model.permutation_feature_importance(self.regression_data)
+
+        ticket_path = get_dataset('gen_tickettrain').as_filepath()
+        self.ranking_data = FileDataStream.read_csv(ticket_path)
+        ranker_pipeline = Pipeline([
+            ToKey(columns=['group']),
+            LightGbmRanker(feature=['Class', 'dep_day', 'duration'],
+                           label='rank', group_id='group',
+                           random_state=0, number_of_threads=1)])
+        self.ranker_model = ranker_pipeline.fit(self.ranking_data)
+        self.ranker_pfi = self.ranker_model.permutation_feature_importance(self.ranking_data)
+
+    def test_binary_classifier(self):
+        assert_almost_equal(self.binary_pfi['AreaUnderRocCurve'].sum(), -0.140824, 6)
+        assert_almost_equal(self.binary_pfi['PositivePrecision'].sum(), -0.482143, 6)
+        assert_almost_equal(self.binary_pfi['PositiveRecall'].sum(), -0.0695652, 6)
+        assert_almost_equal(self.binary_pfi['NegativePrecision'].sum(), -0.0139899, 6)
+        assert_almost_equal(self.binary_pfi['NegativeRecall'].sum(), -0.00779221, 6)
+        assert_almost_equal(self.binary_pfi['F1Score'].sum(), -0.126983, 6)
+        assert_almost_equal(self.binary_pfi['AreaUnderPrecisionRecallCurve'].sum(), -0.19365, 5)
+
+    def test_binary_classifier_from_loaded_model(self):
+        model_path = "model.zip"
+        self.binary_model.save_model(model_path)
+        loaded_model = Pipeline()
+        loaded_model.load_model(model_path)
+        pfi_from_loaded = loaded_model.permutation_feature_importance(self.classification_data)
+        assert_frame_equal(self.binary_pfi, pfi_from_loaded)
+        os.remove(model_path)
+
+    def test_clasifier(self):
+        assert_almost_equal(self.classifier_pfi['MacroAccuracy'].sum(), -0.0256352, 6)
+        assert_almost_equal(self.classifier_pfi['LogLoss'].sum(), 0.158811, 6)
+        assert_almost_equal(self.classifier_pfi['LogLossReduction'].sum(), -0.29449, 5)
+        assert_almost_equal(self.classifier_pfi['PerClassLogLoss.0'].sum(), 0.0808459, 6)
+        assert_almost_equal(self.classifier_pfi['PerClassLogLoss.1'].sum(), 0.419826, 6)
+
+    def test_classifier_from_loaded_model(self):
+        model_path = "model.zip"
+        self.classifier_model.save_model(model_path)
+        loaded_model = Pipeline()
+        loaded_model.load_model(model_path)
+        pfi_from_loaded = loaded_model.permutation_feature_importance(self.classification_data)
+        assert_frame_equal(self.classifier_pfi, pfi_from_loaded)
+        os.remove(model_path)
+
+    def test_regressor(self):
+        assert_almost_equal(self.regressor_pfi['MeanAbsoluteError'].sum(), 0.504701, 6)
+        assert_almost_equal(self.regressor_pfi['MeanSquaredError'].sum(), 5.59277, 5)
+        assert_almost_equal(self.regressor_pfi['RootMeanSquaredError'].sum(), 0.553048, 6)
+        assert_almost_equal(self.regressor_pfi['RSquared'].sum(), -0.203612, 6)
+
+    def test_regressor_from_loaded_model(self):
+        model_path = "model.zip"
+        self.regressor_model.save_model(model_path)
+        loaded_model = Pipeline()
+        loaded_model.load_model(model_path)
+        pfi_from_loaded = loaded_model.permutation_feature_importance(self.regression_data)
+        assert_frame_equal(self.regressor_pfi, pfi_from_loaded)
+        os.remove(model_path)
+
+    def test_ranker(self):
+        assert_almost_equal(self.ranker_pfi['DCG@1'].sum(), -2.16404, 5)
+        assert_almost_equal(self.ranker_pfi['DCG@2'].sum(), -3.5294, 4)
+        assert_almost_equal(self.ranker_pfi['DCG@3'].sum(), -4.9721, 4)
+        assert_almost_equal(self.ranker_pfi['NDCG@1'].sum(), -0.114286, 6)
+        assert_almost_equal(self.ranker_pfi['NDCG@2'].sum(), -0.198631, 6)
+        assert_almost_equal(self.ranker_pfi['NDCG@3'].sum(), -0.236544, 6)
+
+    def test_ranker_from_loaded_model(self):
+        model_path = "model.zip"
+        self.ranker_model.save_model(model_path)
+        loaded_model = Pipeline()
+        loaded_model.load_model(model_path)
+        pfi_from_loaded = loaded_model.permutation_feature_importance(self.ranking_data)
+        assert_frame_equal(self.ranker_pfi, pfi_from_loaded)
+        os.remove(model_path)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py b/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py
index f16e43aa..f1fc2ec7 100644
--- a/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py
+++ b/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py
@@ -11,7 +11,10 @@
 from nimbusml.datasets import get_dataset
 from nimbusml.feature_extraction.categorical import OneHotVectorizer
 from nimbusml.linear_model import LogisticRegressionBinaryClassifier, OnlineGradientDescentRegressor
+from nimbusml.multiclass import OneVsRestClassifier
 from nimbusml.preprocessing.filter import RangeFilter
+from nimbusml.preprocessing import DatasetTransformer
+from nimbusml.preprocessing.schema import PrefixColumnConcatenator
 
 seed = 0
 
@@ -406,6 +409,76 @@ def test_combine_with_classifier_trained_with_filedatastream(self):
         self.assertTrue(result_1.equals(result_2))
 
 
+    def test_combined_models_support_predict_proba(self):
+        path = get_dataset('infert').as_filepath()
+
+        data = FileDataStream.read_csv(path)
+
+        transform = OneHotVectorizer(columns={'edu': 'education'})
+        df = transform.fit_transform(data, as_binary_data_stream=True)
+
+        feature_cols = ['parity', 'edu', 'age', 'induced', 'spontaneous', 'stratum', 'pooled.stratum']
+        predictor = LogisticRegressionBinaryClassifier(feature=feature_cols, label='case')
+        predictor.fit(df)
+
+        data = FileDataStream.read_csv(path)
+        df = transform.transform(data, as_binary_data_stream=True)
+        result_1 = predictor.predict_proba(df)
+
+        data = FileDataStream.read_csv(path)
+        combined_pipeline = Pipeline.combine_models(transform, predictor)
+        result_2 = combined_pipeline.predict_proba(data)
+
+        self.assertTrue(np.array_equal(result_1, result_2))
+
+
+    def test_combined_models_support_predict_proba_with_more_than_2_classes(self):
+        path = get_dataset('infert').as_filepath()
+        data = FileDataStream.read_csv(path)
+
+        featurization_pipeline = Pipeline([OneHotVectorizer(columns={'education': 'education'})])
+        featurization_pipeline.fit(data)
+        featurized_data = featurization_pipeline.transform(data)
+
+        feature_cols = ['education', 'age']
+        training_pipeline = Pipeline([DatasetTransformer(featurization_pipeline.model), OneVsRestClassifier(LogisticRegressionBinaryClassifier(), feature=feature_cols, label='induced')])
+        training_pipeline.fit(data, output_predictor_model=True)
+
+        concat_pipeline = Pipeline([PrefixColumnConcatenator({'education': 'education.'})])
+        concat_pipeline.fit(featurized_data)
+
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(training_pipeline.predictor_model)
+
+        concat_and_predictor_pipeline = Pipeline.combine_models(concat_pipeline, predictor_pipeline)
+
+        result = concat_and_predictor_pipeline.predict_proba(featurized_data)
+        self.assertEqual(result.shape[1], 3)
+
+
+    def test_combined_models_support_decision_function(self):
+        path = get_dataset('infert').as_filepath()
+
+        data = FileDataStream.read_csv(path)
+
+        transform = OneHotVectorizer(columns={'edu': 'education'})
+        df = transform.fit_transform(data, as_binary_data_stream=True)
+
+        feature_cols = ['parity', 'edu', 'age', 'induced', 'spontaneous', 'stratum', 'pooled.stratum']
+        predictor = LogisticRegressionBinaryClassifier(feature=feature_cols, label='case')
+        predictor.fit(df)
+
+        data = FileDataStream.read_csv(path)
+        df = transform.transform(data, as_binary_data_stream=True)
+        result_1 = predictor.decision_function(df)
+
+        data = FileDataStream.read_csv(path)
+        combined_pipeline = Pipeline.combine_models(transform, predictor)
+        result_2 = combined_pipeline.decision_function(data)
+
+        self.assertTrue(np.array_equal(result_1, result_2))
+
+
 if __name__ == '__main__':
     unittest.main()
 
diff --git a/src/python/nimbusml/tests/pipeline/test_pipeline_get_schema.py b/src/python/nimbusml/tests/pipeline/test_pipeline_get_schema.py
new file mode 100644
index 00000000..63bb5310
--- /dev/null
+++ b/src/python/nimbusml/tests/pipeline/test_pipeline_get_schema.py
@@ -0,0 +1,67 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.linear_model import OnlineGradientDescentRegressor
+from nimbusml.preprocessing.filter import RangeFilter
+
+train_data = {'c0': ['a', 'b', 'a', 'b'],
+              'c1': [1, 2, 3, 4],
+              'c2': [2, 3, 4, 5]}
+train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                            'c2': np.float64})
+
+
+class TestPipelineGetSchema(unittest.TestCase):
+
+    def test_get_schema_returns_correct_value_for_single_valued_columns(self):
+        df = train_df.drop(['c0'], axis=1)
+
+        pipeline = Pipeline([RangeFilter(min=0.0, max=4.5) << 'c2'])
+        pipeline.fit(df)
+        df = pipeline.transform(df)
+
+        schema = pipeline.get_output_columns()
+
+        self.assertTrue('c1' in schema)
+        self.assertTrue('c2' in schema)
+
+        self.assertEqual(len(schema), 2)
+
+    def test_get_schema_returns_correct_value_for_vector_valued_columns(self):
+        pipeline = Pipeline([OneHotVectorizer() << 'c0'])
+        pipeline.fit(train_df)
+
+        schema = pipeline.get_output_columns()
+
+        self.assertTrue('c0.a' in schema)
+        self.assertTrue('c0.b' in schema)
+        self.assertTrue('c1' in schema)
+        self.assertTrue('c2' in schema)
+
+        self.assertEqual(len(schema), 4)
+
+    def test_get_schema_does_not_work_when_predictor_is_part_of_model(self):
+        df = train_df.drop(['c0'], axis=1)
+
+        pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')])
+        pipeline.fit(df)
+
+        try:
+            schema = pipeline.get_output_columns()
+        except Exception as e:
+            pass
+        else:
+            self.fail()
+
+
+if __name__ == '__main__':
+    unittest.main()
+
diff --git a/src/python/nimbusml/tests/pipeline/test_pipeline_split_models.py b/src/python/nimbusml/tests/pipeline/test_pipeline_split_models.py
new file mode 100644
index 00000000..bc1399bf
--- /dev/null
+++ b/src/python/nimbusml/tests/pipeline/test_pipeline_split_models.py
@@ -0,0 +1,172 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import os
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.linear_model import LogisticRegressionBinaryClassifier, OnlineGradientDescentRegressor
+from nimbusml.preprocessing.filter import RangeFilter
+from nimbusml.preprocessing.schema import ColumnConcatenator, PrefixColumnConcatenator
+
+seed = 0
+
+train_data = {'c0': ['a', 'b', 'a', 'b'],
+              'c1': [1, 2, 3, 4],
+              'c2': [2, 3, 4, 5]}
+train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                            'c2': np.float64})
+
+test_data = {'c0': ['a', 'b', 'b'],
+             'c1': [1.5, 2.3, 3.7],
+             'c2': [2.2, 4.9, 2.7]}
+test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
+                                          'c2': np.float64})
+
+
+class TestPipelineSplitModels(unittest.TestCase):
+
+    def test_notvectorized_output_predictor_model(self):
+        """
+        This test verifies that outputted predictor model from 
+        combined (with featurizers) pipeline runs successfully
+        on featurized data with no vectors.
+        """
+        df = train_df.drop(['c0'], axis=1)
+
+        # Create and fit a RangeFilter transform using the training
+        # data and use it to transform the training data.
+        transform_pipeline = Pipeline([RangeFilter(min=0.0, max=4.5) << 'c2'], random_state=seed)
+        transform_pipeline.fit(df)
+        df1 = transform_pipeline.transform(df)
+
+        # Create and fit a combined model and spit out predictor model
+        combined_pipeline = Pipeline([RangeFilter(min=0.0, max=4.5) << 'c2',
+                            OnlineGradientDescentRegressor(label='c2')],
+                           random_state=seed)
+        combined_pipeline.fit(df, output_predictor_model=True)
+        result_1 = combined_pipeline.predict(df)
+
+        # Load predictor pipeline and score featurized data
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(combined_pipeline.predictor_model)
+        result_2 = predictor_pipeline.predict(df1)
+                     
+        self.assertEqual(result_1.loc[0, 'Score'], result_2.loc[0, 'Score'])
+        self.assertEqual(result_1.loc[1, 'Score'], result_2.loc[1, 'Score'])
+
+    def test_vectorized_output_predictor_model(self):
+        """
+        This test shows that outputted predictor model from 
+        combined (with featurizers) pipeline fails to run
+        on featurized data with vectors.
+        """
+
+        # Create and fit a OneHotVectorizer transform using the
+        # training data and use it to transform the training data.
+        transform_pipeline = Pipeline([OneHotVectorizer() << 'c0'], random_state=seed)
+        transform_pipeline.fit(train_df)
+        df = transform_pipeline.transform(train_df)
+
+        # Create and fit a combined model and spit out predictor model
+        combined_pipeline = Pipeline([OneHotVectorizer() << 'c0',
+                            OnlineGradientDescentRegressor(label='c2')],
+                           random_state=seed)
+        combined_pipeline.fit(train_df, output_predictor_model=True)
+        result_1 = combined_pipeline.predict(train_df)
+
+        # Load predictor pipeline and score featurized data
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(combined_pipeline.predictor_model)
+
+        try:
+            # This does not work because the input schema doesnt 
+            # match. Input schema looks for vector 'c0' with slots 'a,b'
+            # but featurized data has only columns 'c0.a' and 'c0.b'
+            predictor_pipeline.predict(df)
+
+        except Exception as e:
+            pass
+        else:
+            self.fail()
+
+    def test_vectorized_with_concat_output_predictor_model(self):
+        """
+        This test shows how to prepend ColumnConcatenator transform
+        to outputted predictor model from combined (with featurizers) pipeline
+        so it successfully runs on featurized data with vectors.
+        """
+        # Create and fit a OneHotVectorizer transform using the
+        # training data and use it to transform the training data.
+        transform_pipeline = Pipeline([OneHotVectorizer() << 'c0'], random_state=seed)
+        transform_pipeline.fit(train_df)
+        df = transform_pipeline.transform(train_df)
+
+        # Create, fit and score with combined model. 
+        # Output predictor model separately.
+        combined_pipeline = Pipeline([OneHotVectorizer() << 'c0',
+                            OnlineGradientDescentRegressor(label='c2')],
+                           random_state=seed)
+        combined_pipeline.fit(train_df, output_predictor_model=True)
+        result_1 = combined_pipeline.predict(train_df)
+
+        # train ColumnConcatenator on featurized data
+        concat_pipeline = Pipeline([ColumnConcatenator(columns={'c0': ['c0.a', 'c0.b']})])
+        concat_pipeline.fit(df)
+
+        # Load predictor pipeline
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(combined_pipeline.predictor_model)
+
+        # combine concat and predictor models and score 
+        combined_predictor_pipeline = Pipeline.combine_models(concat_pipeline, 
+                                                    predictor_pipeline)
+        result_2 = combined_predictor_pipeline.predict(df)
+                     
+        self.assertEqual(result_1.loc[0, 'Score'], result_2.loc[0, 'Score'])
+        self.assertEqual(result_1.loc[1, 'Score'], result_2.loc[1, 'Score'])
+
+    def test_vectorized_with_prefixconcat_output_predictor_model(self):
+        """
+        This test shows how to prepend ColumnConcatenator transform
+        to outputted predictor model from combined (with featurizers) pipeline
+        so it successfully runs on featurized data with vectors.
+        """
+        # Create and fit a OneHotVectorizer transform using the
+        # training data and use it to transform the training data.
+        transform_pipeline = Pipeline([OneHotVectorizer() << 'c0'], random_state=seed)
+        transform_pipeline.fit(train_df)
+        df = transform_pipeline.transform(train_df)
+
+        # Create, fit and score with combined model. 
+        # Output predictor model separately.
+        combined_pipeline = Pipeline([OneHotVectorizer() << 'c0',
+                            OnlineGradientDescentRegressor(label='c2')],
+                           random_state=seed)
+        combined_pipeline.fit(train_df, output_predictor_model=True)
+        result_1 = combined_pipeline.predict(train_df)
+
+        # train ColumnConcatenator on featurized data
+        concat_pipeline = Pipeline([PrefixColumnConcatenator(columns={'c0': 'c0.'})])
+        concat_pipeline.fit(df)
+
+        # Load predictor pipeline
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(combined_pipeline.predictor_model)
+
+        # combine concat and predictor models and score 
+        combined_predictor_pipeline = Pipeline.combine_models(concat_pipeline, 
+                                                    predictor_pipeline)
+        result_2 = combined_predictor_pipeline.predict(df)
+                     
+        self.assertEqual(result_1.loc[0, 'Score'], result_2.loc[0, 'Score'])
+        self.assertEqual(result_1.loc[1, 'Score'], result_2.loc[1, 'Score'])
+
+if __name__ == '__main__':
+    unittest.main()
+
diff --git a/src/python/nimbusml/tests/pipeline/test_pipeline_transform_method.py b/src/python/nimbusml/tests/pipeline/test_pipeline_transform_method.py
new file mode 100644
index 00000000..e16a1e99
--- /dev/null
+++ b/src/python/nimbusml/tests/pipeline/test_pipeline_transform_method.py
@@ -0,0 +1,26 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import unittest
+
+import pandas
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.feature_extraction.text import NGramFeaturizer
+
+path = get_dataset("wiki_detox_train").as_filepath()
+data = FileDataStream.read_csv(path, sep='\t')
+df = data.to_df().head()
+X = df['SentimentText']
+
+class TestPipelineTransformMethod(unittest.TestCase):
+
+    def test_transform_only_pipeline_transform_method(self):
+        p = Pipeline([NGramFeaturizer(char_feature_extractor=None) << 'SentimentText'])
+        p.fit(X)
+        xf = p.transform(X)
+        assert 'SentimentText.==rude==' in xf.columns
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/pipeline/test_predict_proba_decision_function.py b/src/python/nimbusml/tests/pipeline/test_predict_proba_decision_function.py
index f6cc1c70..138622b4 100644
--- a/src/python/nimbusml/tests/pipeline/test_predict_proba_decision_function.py
+++ b/src/python/nimbusml/tests/pipeline/test_predict_proba_decision_function.py
@@ -209,12 +209,12 @@ def test_pass_predict_proba_from_load_model(selfs):
 class TestDecisionFunction(unittest.TestCase):
     def test_pass_decision_function_binary(self):
         assert_almost_equal(decfun_sum(FactorizationMachineBinaryClassifier(
-        )), -32.618393, decimal=5, err_msg=invalid_decision_function_output)
+        )), -30.2316, decimal=4, err_msg=invalid_decision_function_output)
 
     def test_pass_decision_function_binary_with_pipeline(self):
         assert_almost_equal(
             decfun_sum(Pipeline([FactorizationMachineBinaryClassifier(
-            )])), -32.618393, decimal=5,
+            )])), -30.2316, decimal=4,
             err_msg=invalid_decision_function_output)
 
     def test_pass_decision_function_multiclass(self):
diff --git a/src/python/nimbusml/tests/pipeline/test_uci_adult.py b/src/python/nimbusml/tests/pipeline/test_uci_adult.py
index 990f0b72..00ae1728 100644
--- a/src/python/nimbusml/tests/pipeline/test_uci_adult.py
+++ b/src/python/nimbusml/tests/pipeline/test_uci_adult.py
@@ -6,14 +6,15 @@
 import tempfile
 import unittest
 
+import numpy as np
 from nimbusml import FileDataStream
 from nimbusml import Pipeline
 from nimbusml.datasets import get_dataset
 from nimbusml.ensemble import FastTreesBinaryClassifier
 from nimbusml.feature_extraction.categorical import OneHotVectorizer
 from nimbusml.linear_model import FastLinearBinaryClassifier
-from nimbusml.utils import check_accuracy, get_X_y
-from sklearn.utils.testing import assert_raises_regex, assert_equal, assert_true
+from nimbusml.utils import get_X_y
+from sklearn.utils.testing import assert_raises_regex, assert_equal, assert_true, assert_greater
 
 train_file = get_dataset("uciadult_train").as_filepath()
 test_file = get_dataset("uciadult_test").as_filepath()
@@ -32,6 +33,15 @@
               'col=sex:TX:7 col=native-country-region:TX:8 header+'
 label_column = 'label'
 
+def check_accuracy(test_file, label_column, predictions, threshold, sep=','):
+    (test, label) = get_X_y(test_file, label_column, sep=sep)
+    accuracy = np.mean(label[label_column].values ==
+                       predictions.ix[:, 'PredictedLabel'].values)
+    assert_greater(
+        accuracy,
+        threshold,
+        "accuracy should be greater than %s" %
+        threshold)
 
 class TestUciAdult(unittest.TestCase):
 
@@ -173,15 +183,5 @@ def test_experiment_loadsavemodel(self):
             sum2,
             "model metrics don't match after loading model")
 
-    def test_parallel(self):
-        (train, label) = get_X_y(train_file, label_column, sep=',')
-        cat = OneHotVectorizer() << categorical_columns
-        ftree = FastTreesBinaryClassifier()
-        pipeline = Pipeline([cat, ftree])
-
-        result = pipeline.fit(train, label, parallel=8)
-        result2 = pipeline.fit(train, label, parallel=1)
-        assert_true(result == result2)
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py b/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py
index fb0bdc79..0dc85f6e 100644
--- a/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py
+++ b/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py
@@ -98,7 +98,8 @@ def test_input_conversion_to_float(self):
         assert_equal(result.loc[2, 'f5'], True)
         result.loc[2, 'f5'] = False
         result = ~result
-        self.assertTrue(result.all(axis=None))
+        for val in result.all().tolist():
+            self.assertTrue(val)
 
         # Check Filter
         xf = Filter()
diff --git a/src/python/nimbusml/tests/preprocessing/normalization/test_lpscaler.py b/src/python/nimbusml/tests/preprocessing/normalization/test_lpscaler.py
new file mode 100644
index 00000000..94f7d1bd
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/normalization/test_lpscaler.py
@@ -0,0 +1,70 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline
+from nimbusml.preprocessing.normalization import LpScaler
+from nimbusml.preprocessing.schema import ColumnConcatenator
+from sklearn.utils.testing import assert_greater, assert_less
+
+
+class TestLpScaler(unittest.TestCase):
+
+    def test_lpscaler(self):
+        in_df = pd.DataFrame(
+            data=dict(
+                Sepal_Length=[2.5, 1, 2.1, 1.0],
+                Sepal_Width=[.75, .9, .8, .76],
+                Petal_Length=[0, 2.5, 2.6, 2.4],
+                Species=["setosa", "viginica", "setosa", 'versicolor']))
+
+        in_df.iloc[:, 0:3] = in_df.iloc[:, 0:3].astype(np.float32)
+
+        src_cols = ['Sepal_Length', 'Sepal_Width', 'Petal_Length']
+
+        pipeline = Pipeline([
+            ColumnConcatenator() << {'concat': src_cols},
+            LpScaler() << {'norm': 'concat'}
+        ])
+        out_df = pipeline.fit_transform(in_df)
+
+        cols = ['concat.' + s for s in src_cols]
+        cols.extend(['norm.' + s for s in src_cols])
+        sum = out_df[cols].sum().sum()
+        sum_range = (23.24, 23.25)
+        assert_greater(sum, sum_range[0], "sum should be greater than %s" % sum_range[0])
+        assert_less(sum, sum_range[1], "sum should be less than %s" % sum_range[1])
+
+    def test_lpscaler_automatically_converts_to_single(self):
+        in_df = pd.DataFrame(
+            data=dict(
+                Sepal_Length=[2.5, 1, 2.1, 1.0],
+                Sepal_Width=[.75, .9, .8, .76],
+                Petal_Length=[0, 2.5, 2.6, 2.4],
+                Species=["setosa", "viginica", "setosa", 'versicolor']))
+
+        in_df.iloc[:, 0:3] = in_df.iloc[:, 0:3].astype(np.float64)
+
+        src_cols = ['Sepal_Length', 'Sepal_Width', 'Petal_Length']
+
+        pipeline = Pipeline([
+            ColumnConcatenator() << {'concat': src_cols},
+            LpScaler() << {'norm': 'concat'}
+        ])
+        out_df = pipeline.fit_transform(in_df)
+
+        cols = ['concat.' + s for s in src_cols]
+        cols.extend(['norm.' + s for s in src_cols])
+        sum = out_df[cols].sum().sum()
+        sum_range = (23.24, 23.25)
+        assert_greater(sum, sum_range[0], "sum should be greater than %s" % sum_range[0])
+        assert_less(sum, sum_range[1], "sum should be less than %s" % sum_range[1])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/preprocessing/schema/test_prefixcolumnconcatenator.py b/src/python/nimbusml/tests/preprocessing/schema/test_prefixcolumnconcatenator.py
new file mode 100644
index 00000000..75471be3
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/schema/test_prefixcolumnconcatenator.py
@@ -0,0 +1,36 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+from nimbusml import FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.preprocessing.schema import PrefixColumnConcatenator
+
+
+class TestPrefixColumnConcatenator(unittest.TestCase):
+
+    def test_prefix_columns_concatenator(self):
+        data = get_dataset('iris').as_df()
+        xf = PrefixColumnConcatenator(columns={'Spl': 'Sepal_', 'Pet': 'Petal_' })
+        features = xf.fit_transform(data)
+
+        assert features.shape == (150, 11)
+        assert set(features.columns) == {
+            'Sepal_Length',
+            'Sepal_Width',
+            'Petal_Length',
+            'Petal_Width',
+            'Label',
+            'Species',
+            'Setosa',
+            'Spl.Sepal_Length',
+            'Spl.Sepal_Width',
+            'Pet.Petal_Length',
+            'Pet.Petal_Width'}
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/preprocessing/test_datasettransformer.py b/src/python/nimbusml/tests/preprocessing/test_datasettransformer.py
new file mode 100644
index 00000000..197119c6
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/test_datasettransformer.py
@@ -0,0 +1,184 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import os
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.linear_model import LogisticRegressionBinaryClassifier, OnlineGradientDescentRegressor
+from nimbusml.preprocessing import DatasetTransformer
+from nimbusml.preprocessing.filter import RangeFilter
+from nimbusml import FileDataStream 
+
+seed = 0
+
+train_data = {'c0': ['a', 'b', 'a', 'b'],
+              'c1': [1, 2, 3, 4],
+              'c2': [2, 3, 4, 5]}
+train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                            'c2': np.float64})
+
+test_data = {'c0': ['a', 'b', 'b'],
+             'c1': [1.5, 2.3, 3.7],
+             'c2': [2.2, 4.9, 2.7]}
+test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
+                                          'c2': np.float64})
+
+
+class TestDatasetTransformer(unittest.TestCase):
+
+    def test_same_schema_with_dataframe_input(self):
+        train_df_updated = train_df.drop(['c0'], axis=1)
+        test_df_updated = test_df.drop(['c0'], axis=1)
+
+        rf_max = 4.5
+
+        # Create reference pipeline
+        std_pipeline = Pipeline([
+            RangeFilter(min=0.0, max=rf_max) << 'c2',
+            OnlineGradientDescentRegressor(label='c2', feature=['c1'])
+        ], random_state=seed)
+
+        std_pipeline.fit(train_df_updated)
+        result_1 = std_pipeline.predict(test_df_updated)
+
+        # Create combined pipeline
+        transform_pipeline = Pipeline([RangeFilter(min=0.0, max=rf_max) << 'c2'])
+        transform_pipeline.fit(train_df_updated)
+
+        combined_pipeline = Pipeline([
+            DatasetTransformer(transform_model=transform_pipeline.model),
+            OnlineGradientDescentRegressor(label='c2', feature=['c1'])
+        ], random_state=seed)
+        combined_pipeline.fit(train_df_updated)
+
+        os.remove(transform_pipeline.model)
+
+        result_2 = combined_pipeline.predict(test_df_updated)
+
+        self.assertTrue(result_1.equals(result_2))
+
+
+    def test_different_schema_with_dataframe_input(self):
+        # Create reference pipeline
+        std_pipeline = Pipeline([
+            OneHotVectorizer() << 'c0',
+            OnlineGradientDescentRegressor(label='c2', feature=['c0', 'c1'])
+        ], random_state=seed)
+
+        std_pipeline.fit(train_df)
+        result_1 = std_pipeline.predict(test_df)
+
+        # Create combined pipeline
+        transform_pipeline = Pipeline([OneHotVectorizer() << 'c0'], random_state=seed)
+        transform_pipeline.fit(train_df)
+
+        combined_pipeline = Pipeline([
+            DatasetTransformer(transform_model=transform_pipeline.model),
+            OnlineGradientDescentRegressor(label='c2', feature=['c0', 'c1'])
+        ], random_state=seed)
+        combined_pipeline.fit(train_df)
+
+        os.remove(transform_pipeline.model)
+
+        result_2 = combined_pipeline.predict(test_df)
+
+        self.assertTrue(result_1.equals(result_2))
+
+
+    def test_different_schema_with_filedatastream_input(self):
+        train_filename = "train-data.csv"
+        train_df.to_csv(train_filename, index=False, header=True)
+        train_data_stream = FileDataStream.read_csv(train_filename, sep=',', header=True)
+
+        test_filename = "test-data.csv"
+        test_df.to_csv(test_filename, index=False, header=True)
+        test_data_stream = FileDataStream.read_csv(test_filename, sep=',', header=True)
+
+        # Create reference pipeline
+        std_pipeline = Pipeline([
+            OneHotVectorizer() << 'c0',
+            OnlineGradientDescentRegressor(label='c2', feature=['c0', 'c1'])
+        ], random_state=seed)
+
+        std_pipeline.fit(train_data_stream)
+        result_1 = std_pipeline.predict(test_data_stream)
+
+        # Create combined pipeline
+        transform_pipeline = Pipeline([OneHotVectorizer() << 'c0'], random_state=seed)
+        transform_pipeline.fit(train_data_stream)
+
+        combined_pipeline = Pipeline([
+            DatasetTransformer(transform_model=transform_pipeline.model),
+            OnlineGradientDescentRegressor(label='c2', feature=['c0', 'c1'])
+        ], random_state=seed)
+        combined_pipeline.fit(train_data_stream)
+
+        os.remove(transform_pipeline.model)
+
+        result_2 = combined_pipeline.predict(test_data_stream)
+
+        self.assertTrue(result_1.equals(result_2))
+
+        os.remove(train_filename)
+        os.remove(test_filename)
+
+
+    def test_combining_two_dataset_transformers(self):
+        rf_max = 4.5
+
+        # Create reference pipeline
+        std_pipeline = Pipeline([
+            RangeFilter(min=0.0, max=rf_max) << 'c2',
+            OneHotVectorizer() << 'c0',
+            OnlineGradientDescentRegressor(label='c2', feature=['c0', 'c1'])
+        ], random_state=seed)
+
+        std_pipeline.fit(train_df)
+        result_1 = std_pipeline.predict(test_df)
+
+        # Create combined pipeline
+        transform_pipeline1 = Pipeline([RangeFilter(min=0.0, max=rf_max) << 'c2'])
+        transform_pipeline1.fit(train_df)
+
+        transform_pipeline2 = Pipeline([OneHotVectorizer() << 'c0'], random_state=seed)
+        transform_pipeline2.fit(train_df)
+
+        combined_pipeline = Pipeline([
+            DatasetTransformer(transform_model=transform_pipeline1.model),
+            DatasetTransformer(transform_model=transform_pipeline2.model),
+            OnlineGradientDescentRegressor(label='c2', feature=['c0', 'c1'])
+        ], random_state=seed)
+        combined_pipeline.fit(train_df)
+
+        os.remove(transform_pipeline1.model)
+        os.remove(transform_pipeline2.model)
+
+        result_2 = combined_pipeline.predict(test_df)
+
+        self.assertTrue(result_1.equals(result_2))
+
+
+    def test_get_fit_info(self):
+        transform_pipeline = Pipeline([RangeFilter(min=0.0, max=4.5) << 'c2'])
+        transform_pipeline.fit(train_df)
+
+        combined_pipeline = Pipeline([
+            DatasetTransformer(transform_model=transform_pipeline.model),
+            OnlineGradientDescentRegressor(label='c2', feature=['c1'])
+        ], random_state=seed)
+        combined_pipeline.fit(train_df)
+
+        info = combined_pipeline.get_fit_info(train_df)
+
+        self.assertTrue(info[0][1]['name'] == 'DatasetTransformer')
+
+
+if __name__ == '__main__':
+    unittest.main()
+
diff --git a/src/python/nimbusml/tests/preprocessing/text/test_wordtokenizer.py b/src/python/nimbusml/tests/preprocessing/text/test_wordtokenizer.py
new file mode 100644
index 00000000..a8c66016
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/text/test_wordtokenizer.py
@@ -0,0 +1,33 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import unittest
+
+import pandas
+from nimbusml import Pipeline
+from nimbusml.preprocessing.text import WordTokenizer
+
+
+class TestWordTokenizer(unittest.TestCase):
+
+    def test_wordtokenizer(self):
+        customer_reviews = pandas.DataFrame(data=dict(review=[
+            "I really did not like the taste of it",
+            "It was surprisingly quite good!"]))
+
+        tokenize = WordTokenizer(char_array_term_separators=[" ", "n"]) << 'review'
+        pipeline = Pipeline([tokenize])
+
+        tokenize.fit(customer_reviews)
+        y = tokenize.transform(customer_reviews)
+
+        self.assertEqual(y.shape, (2, 9))
+
+        self.assertEqual(y.loc[0, 'review.3'], 'ot')
+        self.assertEqual(y.loc[1, 'review.3'], 'gly')
+        self.assertEqual(y.loc[1, 'review.6'], None)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/scikit/test_uci_adult_scikit.py b/src/python/nimbusml/tests/scikit/test_uci_adult_scikit.py
index a08ce3b9..037987db 100644
--- a/src/python/nimbusml/tests/scikit/test_uci_adult_scikit.py
+++ b/src/python/nimbusml/tests/scikit/test_uci_adult_scikit.py
@@ -16,14 +16,14 @@
 from nimbusml.linear_model import FastLinearBinaryClassifier
 from nimbusml.linear_model import LogisticRegressionClassifier
 from nimbusml.preprocessing.normalization import MeanVarianceScaler
-from nimbusml.utils import check_accuracy_scikit, get_X_y
+from nimbusml.utils import get_X_y
 from sklearn.base import clone
 from sklearn.datasets import load_iris
 from sklearn.decomposition import PCA
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.preprocessing import OneHotEncoder
-from sklearn.utils.testing import assert_equal
+from sklearn.utils.testing import assert_equal, assert_greater
 
 try:
     from pandas.testing import assert_frame_equal
@@ -45,6 +45,19 @@
     'native-country-region']
 selected_features = ['age', 'education-num']
 
+def check_accuracy_scikit(
+        test_file,
+        label_column,
+        predictions,
+        threshold,
+        sep=','):
+    (test, label) = get_X_y(test_file, label_column, sep=sep)
+    accuracy = np.mean(label[label_column].values == predictions.values)
+    assert_greater(
+        accuracy,
+        threshold,
+        "accuracy should be greater than %s" %
+        threshold)
 
 class TestUciAdultScikit(unittest.TestCase):
 
diff --git a/src/python/nimbusml/tests/test_csr_matrix_output.py b/src/python/nimbusml/tests/test_csr_matrix_output.py
new file mode 100644
index 00000000..f4909906
--- /dev/null
+++ b/src/python/nimbusml/tests/test_csr_matrix_output.py
@@ -0,0 +1,184 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline
+from nimbusml.feature_extraction.categorical import OneHotVectorizer
+from nimbusml.preprocessing.schema import ColumnConcatenator, ColumnDropper
+from scipy.sparse import csr_matrix 
+
+
+class TestCsrMatrixOutput(unittest.TestCase):
+
+    def test_column_dropped_output_produces_expected_result(self):
+        train_data = {'c1': [1, 0, 0, 4],
+                      'c2': [2, 3, 0, 5],
+                      'c3': [3, 4, 5, 6]}
+        train_df = pd.DataFrame(train_data).astype(np.float32)
+
+        xf = ColumnDropper(columns=['c3'])
+        xf.fit(train_df)
+        result = xf.transform(train_df, as_csr=True)
+
+        self.assertEqual(result.nnz, 5)
+        self.assertTrue(type(result) == csr_matrix)
+        result = pd.DataFrame(result.todense())
+
+        train_data = {0: [1, 0, 0, 4],
+                      1: [2, 3, 0, 5]}
+        expected_result = pd.DataFrame(train_data).astype(np.float32)
+
+        self.assertTrue(result.equals(expected_result))
+
+    def test_fit_transform_produces_expected_result(self):
+        train_data = {'c1': [1, 0, 0, 4],
+                      'c2': [2, 3, 0, 5],
+                      'c3': [3, 4, 5, 6]}
+        train_df = pd.DataFrame(train_data).astype(np.float32)
+
+        xf = ColumnDropper(columns=['c3'])
+        result = xf.fit_transform(train_df, as_csr=True)
+
+        self.assertEqual(result.nnz, 5)
+        self.assertTrue(type(result) == csr_matrix)
+        result = pd.DataFrame(result.todense())
+
+        train_data = {0: [1, 0, 0, 4],
+                      1: [2, 3, 0, 5]}
+        expected_result = pd.DataFrame(train_data).astype(np.float32)
+
+        self.assertTrue(result.equals(expected_result))
+
+    def test_vector_column_combined_with_single_value_columns(self):
+        train_data = {'c1': [1, 0, 0, 4],
+                      'c2': [2, 3, 0, 5],
+                      'c3': [3, 4, 5, 6]}
+        train_df = pd.DataFrame(train_data).astype(np.float32)
+
+        xf = ColumnConcatenator(columns={'features': ['c1', 'c2', 'c3']})
+        xf.fit(train_df)
+        result = xf.transform(train_df, as_csr=True)
+
+        self.assertEqual(result.nnz, 18)
+        self.assertTrue(type(result) == csr_matrix)
+        result = pd.DataFrame(result.todense())
+
+        train_data = {0: [1, 0, 0, 4],
+                      1: [2, 3, 0, 5],
+                      2: [3, 4, 5, 6],
+                      3: [1, 0, 0, 4],
+                      4: [2, 3, 0, 5],
+                      5: [3, 4, 5, 6]}
+        expected_result = pd.DataFrame(train_data).astype(np.float32)
+        self.assertTrue(result.equals(expected_result))
+
+    def test_sparse_vector_column(self):
+        train_data = {'c0': ['a', 'b', 'a', 'b'],
+                      'c1': ['c', 'd', 'd', 'c']}
+        train_df = pd.DataFrame(train_data)
+
+        xf = OneHotVectorizer(columns={'c0':'c0', 'c1':'c1'})
+        xf.fit(train_df)
+        expected_result = xf.transform(train_df)
+        self.assertTrue(type(expected_result) == pd.DataFrame)
+
+        result = xf.transform(train_df, as_csr=True)
+        self.assertEqual(result.nnz, 8)
+        self.assertTrue(type(result) == csr_matrix)
+
+        result = pd.DataFrame(result.todense(), columns=['c0.a', 'c0.b', 'c1.c', 'c1.d'])
+
+        self.assertTrue(result.equals(expected_result))
+
+    def test_sparse_vector_column_combined_with_single_value_columns(self):
+        train_data = {'c0': [0, 1, 0, 3],
+                      'c1': ['a', 'b', 'a', 'b']}
+        train_df = pd.DataFrame(train_data).astype({'c0': np.float32})
+
+        xf = OneHotVectorizer(columns={'c1':'c1'})
+        xf.fit(train_df)
+        expected_result = xf.transform(train_df)
+        self.assertTrue(type(expected_result) == pd.DataFrame)
+
+        result = xf.transform(train_df, as_csr=True)
+        self.assertEqual(result.nnz, 6)
+        self.assertTrue(type(result) == csr_matrix)
+
+        result = pd.DataFrame(result.todense(), columns=['c0', 'c1.a', 'c1.b'])
+
+        self.assertTrue(result.equals(expected_result))
+
+    def test_types_convertable_to_r4_get_output_as_r4(self):
+        train_data = {'c1': [1, 0, 0, 4],
+                      'c2': [2, 3, 0, 5],
+                      'c3': [3, 4, 5, 6],
+                      'c4': [4, 5, 6, 7]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.ubyte,
+                                                    'c2': np.short,
+                                                    'c3': np.float32})
+
+        xf = ColumnDropper(columns=['c4'])
+        xf.fit(train_df)
+        result = xf.transform(train_df, as_csr=True)
+
+        self.assertTrue(type(result) == csr_matrix)
+        self.assertEqual(result.nnz, 9)
+        result = pd.DataFrame(result.todense())
+
+        train_data = {0: [1, 0, 0, 4],
+                      1: [2, 3, 0, 5],
+                      2: [3, 4, 5, 6]}
+        expected_result = pd.DataFrame(train_data).astype(np.float32)
+
+        self.assertTrue(result.equals(expected_result))
+
+        self.assertEqual(result.dtypes[0], np.float32)
+        self.assertEqual(result.dtypes[1], np.float32)
+        self.assertEqual(result.dtypes[2], np.float32)
+
+    def test_types_convertable_to_r8_get_output_as_r8(self):
+        large_int64 = 372036854775807
+        train_data = {'c1': [1, 0, 0, 4],
+                      'c2': [2, 3, 0, 5],
+                      'c3': [3, 0, 5, 0],
+                      'c4': [0, 5, 6, 7],
+                      'c5': [0, 5, 0, large_int64],
+                      'c6': [5, 6, 7, 8]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.ubyte,
+                                                    'c2': np.short,
+                                                    'c3': np.float32,
+                                                    'c4': np.float64,
+                                                    'c5': np.int64})
+
+        xf = ColumnDropper(columns=['c6'])
+        xf.fit(train_df)
+        result = xf.transform(train_df, as_csr=True)
+
+        self.assertTrue(type(result) == csr_matrix)
+        self.assertEqual(result.nnz, 12)
+        result = pd.DataFrame(result.todense())
+
+        train_data = {0: [1, 0, 0, 4],
+                      1: [2, 3, 0, 5],
+                      2: [3, 0, 5, 0],
+                      3: [0, 5, 6, 7],
+                      4: [0, 5, 0, large_int64]}
+        expected_result = pd.DataFrame(train_data).astype(np.float64)
+
+        self.assertTrue(result.equals(expected_result))
+
+        self.assertEqual(result.dtypes[0], np.float64)
+        self.assertEqual(result.dtypes[1], np.float64)
+        self.assertEqual(result.dtypes[2], np.float64)
+        self.assertEqual(result.dtypes[3], np.float64)
+
+        self.assertEqual(result.loc[3, 4], large_int64)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/test_entrypoints.py b/src/python/nimbusml/tests/test_entrypoints.py
index 257d5bef..c4e53546 100644
--- a/src/python/nimbusml/tests/test_entrypoints.py
+++ b/src/python/nimbusml/tests/test_entrypoints.py
@@ -11,7 +11,7 @@
 from nimbusml.internal.entrypoints.transforms_twoheterogeneousmodelcombiner \
     import \
     transforms_twoheterogeneousmodelcombiner
-from nimbusml.internal.utils.entrypoints import EntryPoint, Graph
+from nimbusml.internal.utils.entrypoints import EntryPoint, Graph, DataOutputFormat
 
 
 # from imp import reload
@@ -116,7 +116,7 @@ def test_logistic_regression_graph(self):
         graph = Graph(
             dict(
                 input_data=""), dict(
-                output_model=""), False, *all_nodes)
+                output_model=""), DataOutputFormat.DF, *all_nodes)
         # print(graph)
         graph.run(X=None, dryrun=True)
 
diff --git a/src/python/nimbusml/tests/test_errors.py b/src/python/nimbusml/tests/test_errors.py
index df14baf2..744ac275 100644
--- a/src/python/nimbusml/tests/test_errors.py
+++ b/src/python/nimbusml/tests/test_errors.py
@@ -41,7 +41,7 @@ def test_error_wrong_column_name(self):
                 raise Exception(
                     'boost.python did not replace the exception.\n{0}'.format(
                         e))
-            assert "Check the log for error messages" in str(e)
+            assert "Error: *** System.ArgumentOutOfRangeException: 'Could not find input column" in str(e)
 
     @unittest.skip("System.NullReferenceException")
     def test_char_tokenizer(self):
diff --git a/src/python/nimbusml/tests/test_syntax_onehotvectorizer.py b/src/python/nimbusml/tests/test_syntax_onehotvectorizer.py
index 556271af..c31879c1 100644
--- a/src/python/nimbusml/tests/test_syntax_onehotvectorizer.py
+++ b/src/python/nimbusml/tests/test_syntax_onehotvectorizer.py
@@ -79,7 +79,7 @@ def test_syntax5_failing(self):
             vec.fit_transform(X, verbose=2)
             assert False
         except RuntimeError as e:
-            assert "Returned code is -1. Check the log for error messages.." \
+            assert "Error: *** System.ArgumentOutOfRangeException: 'Could not find input column" \
                    in str(e)
         vec = OneHotVectorizer() << {'edu1': ['education']}
         res = vec.fit_transform(X)
@@ -147,3 +147,6 @@ def test_syntax9_multiple_inputs(self):
             'out1': ['education1', 'education2']}
         output4 = ng4.fit_transform(X)
         assert output4.shape == (5, 13)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/tests/test_variable_column.py b/src/python/nimbusml/tests/test_variable_column.py
new file mode 100644
index 00000000..6c1fc8bd
--- /dev/null
+++ b/src/python/nimbusml/tests/test_variable_column.py
@@ -0,0 +1,196 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+ 
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline
+from nimbusml.internal.entrypoints.transforms_variablecolumn import transforms_variablecolumn
+from nimbusml.internal.utils.entrypoints import Graph, DataOutputFormat
+
+
+class TestVariableColumn(unittest.TestCase):
+
+    def to_variable_column(self, input, features=None, length_column_name=None):
+        node = transforms_variablecolumn(data='$data',
+                                         output_data='$output_data',
+                                         features=features,
+                                         length_column_name=length_column_name)
+
+        graph_nodes = [node]
+        graph = Graph(dict(data=''),
+                      dict(output_data=''),
+                      DataOutputFormat.DF,
+                      *(graph_nodes))
+
+        (out_model, out_data, out_metrics, _) = graph.run(verbose=True, X=input)
+        return out_data
+
+    def test_nonvariable_columns_are_returned_unchanged(self):
+        train_data = {'c1': [2, 3, 4, 5],
+                      'c2': [3, 4, 5, 6],
+                      'c3': [4, 5, 6, 7],
+                      'c4': [0, 1, 2, 1]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        result = self.to_variable_column(train_df, ['c1', 'c2'])
+
+        self.assertTrue(result.loc[:, 'c3'].equals(train_df.loc[:, 'c3']))
+        self.assertTrue(result.loc[:, 'c4'].equals(train_df.loc[:, 'c4']))
+
+    def test_variable_columns_of_same_length_do_not_add_nans(self):
+        train_data = {'c1': [2, 3, 4, 5],
+                      'c2': [3, 4, 5, 6],
+                      'c3': [4, 5, 6, 7]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        result = self.to_variable_column(train_df, ['c1', 'c2'])
+
+        self.assertTrue(result.loc[:, 'c1.0'].equals(train_df.loc[:, 'c1']))
+        self.assertTrue(result.loc[:, 'c1.1'].equals(train_df.loc[:, 'c2']))
+
+    def test_variable_columns_with_different_lengths_return_nans(self):
+        train_data = {'c1': [2, 3, 4, 5],
+                      'c2': [3, 4, 5, 6],
+                      'c3': [4, 5, 6, 7],
+                      'c4': [0, 1, 2, 1]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        result = self.to_variable_column(train_df, ['c1', 'c2'], 'c4')
+
+        expectedC1 = pd.Series([np.nan, 3, 4, 5]).astype(np.float64)
+        expectedC2 = pd.Series([np.nan, np.nan, 5, np.nan]).astype(np.float64)
+
+        self.assertTrue(result.loc[:, 'c1.0'].equals(expectedC1))
+        self.assertTrue(result.loc[:, 'c1.1'].equals(expectedC2))
+
+    def test_variable_columns_with_different_lengths_return_nans_when_no_other_columns_are_present(self):
+        train_data = {'c1': [2, 3, 4, 5],
+                      'c2': [3, 4, 5, 6],
+                      'c3': [0, 1, 2, 1]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        result = self.to_variable_column(train_df, ['c1', 'c2'], 'c3')
+
+        expectedC1 = pd.Series([np.nan, 3, 4, 5]).astype(np.float64)
+        expectedC2 = pd.Series([np.nan, np.nan, 5, np.nan]).astype(np.float64)
+
+        self.assertEqual(len(result.columns), 2)
+        self.assertTrue(result.loc[:, 'c1.0'].equals(expectedC1))
+        self.assertTrue(result.loc[:, 'c1.1'].equals(expectedC2))
+
+    def test_variable_columns_are_converted_to_float32(self):
+        """
+        There are no integer nans so values that can be
+        converted to float32 are converted to support nans.
+        There is nullable integer type support in pandas but
+        it is currently marked as experimental and the docs
+        state that the api may change in the future. See
+        https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
+        """
+        types = [np.int8, np.int16, np.uint8, np.uint16, np.float32]
+
+        for type in types:
+            train_data = {'c1': [2, 3, 4, 5], 'c2': [3, 4, 5, 6]}
+            train_df = pd.DataFrame(train_data).astype(type);
+
+            result = self.to_variable_column(train_df, ['c1', 'c2'])
+
+            self.assertEqual(str(result.dtypes[0]), 'float32')
+            self.assertEqual(str(result.dtypes[1]), 'float32')
+
+    def test_variable_columns_are_converted_to_float64(self):
+        """
+        There are no integer nans so values that can be
+        converted to float64 are converted to support nans.
+        There is nullable integer type support in pandas but
+        it is currently marked as experimental and the docs
+        state that the api may change in the future. See
+        https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
+        """
+        types = [np.int32, np.uint32, np.int64, np.uint64, np.float64]
+
+        for type in types:
+            train_data = {'c1': [2, 3, 4, 5], 'c2': [3, 4, 5, 6]}
+            train_df = pd.DataFrame(train_data).astype(type);
+
+            result = self.to_variable_column(train_df, ['c1', 'c2'])
+
+            self.assertEqual(str(result.dtypes[0]), 'float64')
+            self.assertEqual(str(result.dtypes[1]), 'float64')
+
+    def test_column_with_all_vector_lengths_of_zero_returns_one_column_filled_with_nans(self):
+        train_data = {'c1': [2, 3, 4, 5],
+                      'c2': [3, 4, 5, 6],
+                      'c3': [0, 0, 0, 0]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        result = self.to_variable_column(train_df, ['c1', 'c2'], 'c3')
+
+        expectedC1 = pd.Series([np.nan, np.nan, np.nan, np.nan]).astype(np.float64)
+
+        self.assertEqual(len(result.columns), 1)
+        self.assertTrue(result.loc[:, 'c1.0'].equals(expectedC1))
+
+    def test_variable_column_conversion_leaves_nans_untouched_if_they_already_exist_in_the_input(self):
+        train_data = {'c1': [2, 3, np.nan, 5],
+                      'c2': [3, np.nan, 5, 6],
+                      'c3': [2, 2, 2, 1]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        result = self.to_variable_column(train_df, ['c1', 'c2'], 'c3')
+
+        expectedC1 = pd.Series([2, 3, np.nan, 5]).astype(np.float64)
+        expectedC2 = pd.Series([3, np.nan, 5, np.nan]).astype(np.float64)
+
+        self.assertEqual(len(result.columns), 2)
+        self.assertTrue(result.loc[:, 'c1.0'].equals(expectedC1))
+        self.assertTrue(result.loc[:, 'c1.1'].equals(expectedC2))
+
+    def test_column_names_are_zero_padded(self):
+        numColsToVerify = [1, 2, 10, 11, 100, 101]
+
+        for numCols in numColsToVerify:
+            inputColNames = ['c' + str(i) for i in range(numCols)]
+            train_data = {k: [2,3,4,5] for k in inputColNames}
+            train_df = pd.DataFrame(train_data).astype(np.float32);
+
+            result = self.to_variable_column(train_df, inputColNames)
+
+            maxDigits = len(inputColNames[-1]) - 1
+            expectedColNames = ['c0.' + str(i).zfill(maxDigits) for i in range(numCols)]
+
+            self.assertTrue(all(result.columns == expectedColNames))
+
+    def test_variable_column_of_type_string(self):
+        train_data = {'c1': ['a', 'b', '', 'd'],
+                      'c2': ['e', 'f', 'g', 'h'],
+                      'c3': [0, 1, 2, 1]}
+        train_df = pd.DataFrame(train_data)
+
+        result = self.to_variable_column(train_df, ['c1', 'c2'], 'c3')
+
+        self.assertEqual(result.loc[0, 'c1.0'], None)
+        self.assertEqual(result.loc[1, 'c1.0'], 'b')
+        self.assertEqual(result.loc[2, 'c1.0'], '')
+        self.assertEqual(result.loc[3, 'c1.0'], 'd')
+
+        self.assertNotEqual(result.loc[2, 'c1.0'], None)
+
+        self.assertEqual(result.loc[0, 'c1.1'], None)
+        self.assertEqual(result.loc[1, 'c1.1'], None)
+        self.assertEqual(result.loc[2, 'c1.1'], 'g')
+        self.assertEqual(result.loc[3, 'c1.1'], None)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/python/nimbusml/utils/__init__.py b/src/python/nimbusml/utils/__init__.py
index 3243711a..1c85d629 100644
--- a/src/python/nimbusml/utils/__init__.py
+++ b/src/python/nimbusml/utils/__init__.py
@@ -1,5 +1,4 @@
-from .utils import get_X_y, evaluate_binary_classifier, check_accuracy, \
-    check_accuracy_scikit, load_img, ColumnSelector
+from .utils import get_X_y, evaluate_binary_classifier, load_img, ColumnSelector
 
 try:
     from inspect import signature
@@ -9,8 +8,6 @@
 __all__ = [
     'get_X_y',
     'evaluate_binary_classifier',
-    'check_accuracy',
-    'check_accuracy_scikit',
     'load_img',
     'ColumnSelector',
     'signature'
diff --git a/src/python/nimbusml/utils/utils.py b/src/python/nimbusml/utils/utils.py
index b9b33075..5e2e9fe6 100644
--- a/src/python/nimbusml/utils/utils.py
+++ b/src/python/nimbusml/utils/utils.py
@@ -12,12 +12,9 @@
 import pandas as pd
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.metrics import roc_auc_score
-from sklearn.utils.testing import assert_greater
-
-
-# select columns from DataFrame insize a pipeline
 
 
+# select columns from DataFrame inside a pipeline
 class ColumnSelector(BaseEstimator, TransformerMixin):
     def __init__(self, columns, ravel=False):
         self.columns = columns
@@ -106,30 +103,4 @@ def evaluate_binary_classifier(target, predicted, probabilities=None):
     auc_score = None
     if probabilities is not None:
         auc_score = roc_auc_score(target, probabilities)
-    return (accuracy, auc_score)
-
-
-def check_accuracy(test_file, label_column, predictions, threshold, sep=','):
-    (test, label) = get_X_y(test_file, label_column, sep=sep)
-    accuracy = np.mean(label[label_column].values ==
-                       predictions.ix[:, 'PredictedLabel'].values)
-    assert_greater(
-        accuracy,
-        threshold,
-        "accuracy should be greater than %s" %
-        threshold)
-
-
-def check_accuracy_scikit(
-        test_file,
-        label_column,
-        predictions,
-        threshold,
-        sep=','):
-    (test, label) = get_X_y(test_file, label_column, sep=sep)
-    accuracy = np.mean(label[label_column].values == predictions.values)
-    assert_greater(
-        accuracy,
-        threshold,
-        "accuracy should be greater than %s" %
-        threshold)
+    return (accuracy, auc_score)
\ No newline at end of file
diff --git a/src/python/setup.py b/src/python/setup.py
index 251adae1..fc350275 100644
--- a/src/python/setup.py
+++ b/src/python/setup.py
@@ -45,7 +45,7 @@
     # Versions should comply with PEP440.  For a discussion on
     # single-sourcing the version across setup.py and the project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='1.3.1',
+    version='1.5.0',
 
     description='NimbusML',
     long_description=long_description,
@@ -115,7 +115,7 @@
             'nose>=1.3', 'pytest>=4.4.0',
             'graphviz', 'imageio',
         ],
-        'dprep': ['azureml-dataprep'],
+        'dprep': ['azureml-dataprep>=1.1.12'],
         'utils': ['graphviz', 'imageio'],
     },
 
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index 3ddce586..e65db7d8 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -115,7 +115,7 @@ setup(
             'nose>=1.3', 'pytest>=4.4.0',
             'graphviz', 'imageio',
         ],
-        'dprep': ['azureml-dataprep'],
+        'dprep': ['azureml-dataprep>=1.1.12'],
         'utils': ['graphviz', 'imageio'],
     },
 
diff --git a/src/python/tests/test_estimator_checks.py b/src/python/tests/test_estimator_checks.py
index 9cbc09d0..7879896c 100644
--- a/src/python/tests/test_estimator_checks.py
+++ b/src/python/tests/test_estimator_checks.py
@@ -8,6 +8,7 @@
 import json
 import os
 
+from nimbusml.decomposition import FactorizationMachineBinaryClassifier
 from nimbusml.ensemble import EnsembleClassifier
 from nimbusml.ensemble import EnsembleRegressor
 from nimbusml.ensemble import LightGbmBinaryClassifier
@@ -16,6 +17,7 @@
 from nimbusml.ensemble import LightGbmRegressor
 from nimbusml.feature_extraction.text import NGramFeaturizer
 from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
+from nimbusml.linear_model import SgdBinaryClassifier
 from nimbusml.preprocessing import TensorFlowScorer
 from nimbusml.preprocessing.filter import SkipFilter, TakeFilter
 from nimbusml.timeseries import (IidSpikeDetector, IidChangePointDetector,
@@ -70,7 +72,9 @@
     # bug, low tolerance
     'FastLinearRegressor': 'check_supervised_y_2d, '
                            'check_regressor_data_not_an_array, '
-                           'check_regressors_int',
+                           'check_regressors_int, '
+                           # todo: investigate
+                           'check_regressors_train',
     # bug decision function shape should be 1
     # dimensional arrays, tolerance
     'FastLinearClassifier': 'check_classifiers_train',
@@ -93,6 +97,8 @@
         'check_estimators_dtypes',
     # tolerance
     'LogisticRegressionClassifier': 'check_classifiers_train',
+    # todo: investigate
+    'OnlineGradientDescentRegressor': 'check_regressors_train',
     # bug decision function shape, prediction bug
     'NaiveBayesClassifier':
         'check_classifiers_train, check_classifiers_classes',
@@ -156,8 +162,7 @@
     'PixelExtractor, Loader, Resizer, \
                         GlobalContrastRowScaler, PcaTransformer, '
     'ColumnConcatenator, Sentiment, CharTokenizer, LightLda, '
-    'NGramFeaturizer, \
-                        WordEmbedding',
+    'NGramFeaturizer, WordEmbedding, LpScaler, WordTokenizer',
     'check_transformer_data_not_an_array, check_pipeline_consistency, '
     'check_fit2d_1feature, check_estimators_fit_returns_self,\
                        check_fit2d_1sample, '
@@ -189,6 +194,7 @@
 INSTANCES = {
     'EnsembleClassifier': EnsembleClassifier(num_models=3),
     'EnsembleRegressor': EnsembleRegressor(num_models=3),
+    'FactorizationMachineBinaryClassifier': FactorizationMachineBinaryClassifier(shuffle=False),
     'LightGbmBinaryClassifier': LightGbmBinaryClassifier(
         minimum_example_count_per_group=1, minimum_example_count_per_leaf=1),
     'LightGbmClassifier': LightGbmClassifier(
@@ -198,6 +204,7 @@
     'LightGbmRanker': LightGbmRanker(
         minimum_example_count_per_group=1, minimum_example_count_per_leaf=1),
     'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=n_gram()),
+    'SgdBinaryClassifier': SgdBinaryClassifier(number_of_threads=1, shuffle=False),
     'SkipFilter': SkipFilter(count=5),
     'TakeFilter': TakeFilter(count=100000),
     'IidSpikeDetector': IidSpikeDetector(columns=['F0']),
@@ -256,7 +263,14 @@ def load_json(file_path):
         return json.loads(content_without_comments)
 
 
-skip_epoints = set(['OneVsRestClassifier', 'TreeFeaturizer'])
+skip_epoints = set([
+    'OneVsRestClassifier',
+    'TreeFeaturizer',
+    # skip SymSgdBinaryClassifier for now, because of crashes.
+    'SymSgdBinaryClassifier',
+    'DatasetTransformer'
+])
+
 epoints = []
 my_path = os.path.realpath(__file__)
 my_dir = os.path.dirname(my_path)
diff --git a/src/python/tests_extended/test_docs_example.py b/src/python/tests_extended/test_docs_example.py
index 27470667..3c93d010 100644
--- a/src/python/tests_extended/test_docs_example.py
+++ b/src/python/tests_extended/test_docs_example.py
@@ -128,9 +128,10 @@ def test_examples(self):
                 "CacheClassesFromAssembly: can't map name "
                 "OLSLinearRegression to Void, already mapped to Void",
                 # TensorFlowScorer.py
-                "tensorflow/compiler/xla/service/service.cc:150] XLA service",
-                "tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device",
+                "tensorflow/compiler/xla/service/service.cc:168] XLA service",
+                "tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device",
                 "tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency:",
+                "tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU",
                 # Binner.py
                 "from collections import Mapping, defaultdict",
                 "DeprecationWarning: Using or importing the ABCs",
diff --git a/src/python/tools/compiler_utils.py b/src/python/tools/compiler_utils.py
index 7771ed6c..41b8e154 100644
--- a/src/python/tools/compiler_utils.py
+++ b/src/python/tools/compiler_utils.py
@@ -128,6 +128,7 @@ def _nodes_with_presteps(self):
     'MinMaxScaler': int_to_r4_converter,
     'MeanVarianceScaler': int_to_r4_converter,
     'LogMeanVarianceScaler': int_to_r4_converter,
+    'LpScaler': int_to_r4_converter,
     'Binner': int_to_r4_converter,
     'Filter': int_to_r4_converter,
     'Handler': int_to_r4_converter,
diff --git a/src/python/tools/entrypoint_compiler.py b/src/python/tools/entrypoint_compiler.py
index b34bf66f..1ea5429d 100644
--- a/src/python/tools/entrypoint_compiler.py
+++ b/src/python/tools/entrypoint_compiler.py
@@ -1457,7 +1457,7 @@ def parse_arg(argument, inout):
                 assert not is_column
                 arg_obj = NumericArrayArg(argument, inout)
             elif itemType in ["String", "DataView", "PredictorModel",
-                              "TransformModel", "Node"]:
+                              "TransformModel", "Node", "Char"]:
                 arg_obj = StringArrayArg(argument, inout,
                                          is_column=is_column)
             elif isinstance(itemType, dict):
diff --git a/src/python/tools/manifest.json b/src/python/tools/manifest.json
index 60bd5321..c8e6d6e5 100644
--- a/src/python/tools/manifest.json
+++ b/src/python/tools/manifest.json
@@ -18486,7 +18486,6 @@
         },
         {
           "Name": "LabelColumnName",
-          "PassAs": "LabelColumn",
           "Type": "String",
           "Desc": "Column to use for labels",
           "Aliases": [
@@ -21711,6 +21710,79 @@
         "ITransformOutput"
       ]
     },
+    {
+      "Name": "Transforms.PermutationFeatureImportance",
+      "Desc": "Permutation Feature Importance (PFI)",
+      "FriendlyName": "PFI",
+      "ShortName": "PFI",
+      "Inputs": [
+        {
+          "Name": "Data",
+          "Type": "DataView",
+          "Desc": "Input dataset",
+          "Required": true,
+          "SortOrder": 1.0,
+          "IsNullable": false
+        },
+        {
+          "Name": "PredictorModel",
+          "Type": "PredictorModel",
+          "Desc": "The path to the model file",
+          "Aliases": [
+            "path"
+          ],
+          "Required": true,
+          "SortOrder": 150.0,
+          "IsNullable": false
+        },
+        {
+          "Name": "UseFeatureWeightFilter",
+          "Type": "Bool",
+          "Desc": "Use feature weights to pre-filter features",
+          "Aliases": [
+            "usefw"
+          ],
+          "Required": false,
+          "SortOrder": 150.0,
+          "IsNullable": false,
+          "Default": false
+        },
+        {
+          "Name": "NumberOfExamplesToUse",
+          "Type": "Int",
+          "Desc": "Limit the number of examples to evaluate on",
+          "Aliases": [
+            "numexamples"
+          ],
+          "Required": false,
+          "SortOrder": 150.0,
+          "IsNullable": true,
+          "Default": null
+        },
+        {
+          "Name": "PermutationCount",
+          "Type": "Int",
+          "Desc": "The number of permutations to perform",
+          "Aliases": [
+            "permutations"
+          ],
+          "Required": false,
+          "SortOrder": 150.0,
+          "IsNullable": false,
+          "Default": 1
+        }
+      ],
+      "Outputs": [
+        {
+          "Name": "Metrics",
+          "Type": "DataView",
+          "Desc": "The PFI metrics"
+        }
+      ],
+      "InputKind": [
+        "ITransformInput"
+      ]
+    },
     {
       "Name": "Transforms.PredictedLabelColumnOriginalValueConverter",
       "Desc": "Transforms a predicted label column to its original values, unless it is of type bool.",
diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json
index acff52df..0a66d5ff 100644
--- a/src/python/tools/manifest_diff.json
+++ b/src/python/tools/manifest_diff.json
@@ -241,7 +241,14 @@
       "Module": "linear_model",
       "Type": "Classifier",
       "Predict_Proba" : true,
-      "Decision_Function" : true
+      "Decision_Function" : true,
+      "Inputs": [
+        {
+        "Name": "Lambda",
+        "NewName": "l2_regularization",
+        "Desc": "L2 regularization weight. It also controls the learning rate, with the learning rate being inversely proportional to it."
+        }
+      ]
     },
     {
       "Name": "Trainers.EnsembleClassification",
@@ -317,6 +324,12 @@
         }
       ]
     },
+    {
+      "Name": "Models.DatasetTransformer",
+      "NewName": "DatasetTransformer",
+      "Module": "preprocessing",
+      "Type": "Transform"
+    },
     {
       "Name": "Trainers.FieldAwareFactorizationMachineBinaryClassifier",
       "NewName": "FactorizationMachineBinaryClassifier",
@@ -474,6 +487,12 @@
       "Module": "preprocessing.normalization",
       "Type": "Transform"
     },
+    {
+      "Name": "Transforms.LpNormalizer",
+      "NewName": "LpScaler",
+      "Module": "preprocessing.normalization",
+      "Type": "Transform"
+    },
     {
       "Name": "Transforms.MissingValuesRowDropper",
       "NewName": "Filter",
@@ -714,6 +733,12 @@
       "Module": "preprocessing.text",
       "Type": "Transform"
     },
+    {
+      "Name": "Transforms.WordTokenizer",
+      "NewName": "WordTokenizer",
+      "Module": "preprocessing.text",
+      "Type": "Transform"
+    },
     {
       "Name": "Transforms.LightLda",
       "NewName": "LightLda",
diff --git a/version.txt b/version.txt
index 3a3cd8cc..bc80560f 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-1.3.1
+1.5.0