microsoft · pieths · Aug 20, 2019 · Oct 31, 2018 · Oct 31, 2018 · Nov 1, 2018
diff --git a/build.cmd b/build.cmd
@@ -59,7 +59,7 @@ if /i [%1] == [--azureBuild]     (
 ) else goto :Usage
 
 :Usage
-echo "Usage: build.cmd [--configuration <Configuration>] [--runTests] [--includeExtendedTests] [--buildDotNetBridgeOnly] [--skipDotNetBridge] [--azureBuild]"
+echo "Usage: build.cmd [--configuration <Configuration>] [--runTests] [--installPythonPackages] [--includeExtendedTests] [--buildDotNetBridgeOnly] [--skipDotNetBridge] [--azureBuild]"
 echo ""
 echo "Options:"
 echo "  --configuration <Configuration>   Build Configuration (DbgWinPy3.7,DbgWinPy3.6,DbgWinPy3.5,DbgWinPy2.7,RlsWinPy3.7,RlsWinPy3.6,RlsWinPy3.5,RlsWinPy2.7)"

diff --git a/build.sh b/build.sh
@@ -11,7 +11,7 @@ mkdir -p "${DependenciesDir}"
 
 usage()
 {
-    echo "Usage: $0 --configuration <Configuration> [--runTests] [--includeExtendedTests]"
+    echo "Usage: $0 --configuration <Configuration> [--runTests] [--includeExtendedTests] [--installPythonPackages]"
     echo ""
     echo "Options:"
     echo "  --configuration <Configuration>   Build Configuration (DbgLinPy3.7,DbgLinPy3.6,DbgLinPy3.5,DbgLinPy2.7,RlsLinPy3.7,RlsLinPy3.6,RlsLinPy3.5,RlsLinPy2.7,DbgMacPy3.7,DbgMacPy3.6,DbgMacPy3.5,DbgMacPy2.7,RlsMacPy3.7,RlsMacPy3.6,RlsMacPy3.5,RlsMacPy2.7)"

diff --git a/src/DotNetBridge/Bridge.cs b/src/DotNetBridge/Bridge.cs
@@ -242,6 +242,10 @@ private struct EnvironmentBlock
             // Call back to provide cancel flag.
             [FieldOffset(0x28)]
             public readonly void* checkCancel;
+
+            // Path to python executable.
+            [FieldOffset(0x30)]
+            public readonly sbyte* pythonPath;
 #pragma warning restore 649 // never assigned
         }
 

diff --git a/src/DotNetBridge/DotNetBridge.csproj b/src/DotNetBridge/DotNetBridge.csproj
@@ -42,7 +42,7 @@
     <PackageReference Include="Microsoft.ML.Dnn" Version="0.15.1" />
     <PackageReference Include="Microsoft.ML.Ensemble" Version="0.15.1" />
     <PackageReference Include="Microsoft.ML.TimeSeries" Version="1.3.1" />
-    <PackageReference Include="Microsoft.DataPrep" Version="0.0.1.5-preview" />
+    <PackageReference Include="Microsoft.DataPrep" Version="0.0.1.12-preview" />
     <PackageReference Include="TensorFlow.NET" Version="0.10.10" />
     <PackageReference Include="SciSharp.TensorFlow.Redist" Version="1.14.0" />
   </ItemGroup>

diff --git a/src/DotNetBridge/RunGraph.cs b/src/DotNetBridge/RunGraph.cs
@@ -147,8 +147,11 @@ private static void RunGraphCore(EnvironmentBlock* penv, IHostEnvironment env, s
                                     var extension = Path.GetExtension(path);
                                     if (extension == ".txt")
                                         dv = TextLoader.LoadFile(host, new TextLoader.Options(), new MultiFileSource(path));
-                                    else if(extension == ".dprep")
+                                    else if (extension == ".dprep")
+                                    {
+                                        DPrepSettings.Instance.PythonPath = BytesToString(penv->pythonPath);
                                         dv = DataFlow.FromDPrepFile(path).ToDataView();
+                                    }
                                     else
                                         dv = new BinaryLoader(host, new BinaryLoader.Arguments(), path);
                                 }

diff --git a/src/NativeBridge/ManagedInterop.cpp b/src/NativeBridge/ManagedInterop.cpp
@@ -77,7 +77,7 @@ EnvironmentBlock::~EnvironmentBlock()
 		FillDead(_vset[i]);
 }
 
-EnvironmentBlock::EnvironmentBlock(int verbosity, int maxThreadsAllowed, int seed)
+EnvironmentBlock::EnvironmentBlock(int verbosity, int maxThreadsAllowed, int seed, const char* pythonPath)
 {
 	// Assert that this class doesn't have a vtable.
 	assert(offsetof(EnvironmentBlock, verbosity) == 0);
@@ -86,6 +86,7 @@ EnvironmentBlock::EnvironmentBlock(int verbosity, int maxThreadsAllowed, int see
 	this->verbosity = verbosity;
 	this->maxThreadsAllowed = maxThreadsAllowed;
 	this->seed = seed;
+	this->pythonPath = pythonPath;
 	this->_kindMask = (1 << Warning) | (1 << Error);
 	if (verbosity > 0)
 		this->_kindMask |= (1 << Info);

diff --git a/src/NativeBridge/ManagedInterop.h b/src/NativeBridge/ManagedInterop.h
@@ -81,8 +81,11 @@ class CLASS_ALIGN EnvironmentBlock
 	// Check cancellation flag.
 	CHECKCANCEL checkCancel;
 
+	// Path to python executable
+	const char* pythonPath;
+
 public:
-	EnvironmentBlock(int verbosity = 0, int maxThreadsAllowed = 0, int seed = 42);
+	EnvironmentBlock(int verbosity = 0, int maxThreadsAllowed = 0, int seed = 42, const char* pythonPath = NULL);
 	~EnvironmentBlock();
 	PyErrorCode GetErrorCode() { return _errCode; }
 	std::string GetErrorMessage() { return _errMessage; }

diff --git a/src/NativeBridge/dllmain.cpp b/src/NativeBridge/dllmain.cpp
@@ -12,6 +12,7 @@
 #define PARAM_MLNET_PATH "mlnetPath"
 #define PARAM_DOTNETCLR_PATH "dotnetClrPath"
 #define PARAM_DPREP_PATH "dprepPath"
+#define PARAM_PYTHON_PATH "pythonPath"
 #define PARAM_DATA "data"
 
 
@@ -74,13 +75,15 @@ bp::dict pxCall(bp::dict& params)
         bp::extract<std::string> mlnetPath(params[PARAM_MLNET_PATH]);
         bp::extract<std::string> dotnetClrPath(params[PARAM_DOTNETCLR_PATH]);
         bp::extract<std::string> dprepPath(params[PARAM_DPREP_PATH]);
-        bp::extract<std::int32_t> verbose(params[PARAM_VERBOSE]);
+		bp::extract<std::string> pythonPath(params[PARAM_PYTHON_PATH]);
+		bp::extract<std::int32_t> verbose(params[PARAM_VERBOSE]);
         std::int32_t i_verbose = std::int32_t(verbose);
         std::string s_mlnetPath = std::string(mlnetPath);
         std::string s_dotnetClrPath = std::string(dotnetClrPath);
         std::string s_dprepPath = std::string(dprepPath);
-        std::string s_graph = std::string(graph);
-        const char *mlnetpath = s_mlnetPath.c_str();
+        std::string s_pythonPath = std::string(pythonPath);
+		std::string s_graph = std::string(graph);
+		const char *mlnetpath = s_mlnetPath.c_str();
         const char *coreclrpath = s_dotnetClrPath.c_str();
         const char *dpreppath = s_dprepPath.c_str();
 
@@ -93,7 +96,7 @@ bp::dict pxCall(bp::dict& params)
         if (params.has_key(PARAM_SEED))
             seed = bp::extract<int>(params[PARAM_SEED]);
 
-        EnvironmentBlock env(i_verbose, 0, seed);
+        EnvironmentBlock env(i_verbose, 0, seed, s_pythonPath.c_str());
         int retCode;
         if (params.has_key(PARAM_DATA) && bp::extract<bp::dict>(params[PARAM_DATA]).check())
         {

diff --git a/src/Platforms/build.csproj b/src/Platforms/build.csproj
@@ -22,7 +22,7 @@
     <PackageReference Include="Microsoft.ML.Dnn" Version="0.15.1" />
     <PackageReference Include="Microsoft.ML.Ensemble" Version="0.15.1" />
     <PackageReference Include="Microsoft.ML.TimeSeries" Version="1.3.1" />
-    <PackageReference Include="Microsoft.DataPrep" Version="0.0.1.5-preview" />
+    <PackageReference Include="Microsoft.DataPrep" Version="0.0.1.12-preview" />
     <PackageReference Include="TensorFlow.NET" Version="0.10.10" />
     <PackageReference Include="SciSharp.TensorFlow.Redist" Version="1.14.0" />
   </ItemGroup>

diff --git a/src/python/docs/docstrings/EnsembleClassifier.txt b/src/python/docs/docstrings/EnsembleClassifier.txt
@@ -30,14 +30,14 @@
 		* ``RandomFeatureSelector``: selects a random subset of the features
 		  for each model.
 
-	:param num_models: indicates the number models to train, i.e. the number of
+	:param num_models: Indicates the number models to train, i.e. the number of
 	    subsets of the training set to sample. The default value is 50. If
 		batches are used then this indicates the number of models per batch.
 
 	:param sub_model_selector_type: Determines the efficient set of models the
-	``output_combiner`` uses, and removes the least significant models. This is
-	used to improve the accuracy and reduce the model size. This is also called
-	pruning.
+	    ``output_combiner`` uses, and removes the least significant models.
+	    This is used to improve the accuracy and reduce the model size. This is
+		also called pruning.
 
 	    * ``ClassifierAllSelector``: does not perform any pruning and selects
 	      all models in the ensemble to combine to create the output. This is
@@ -51,9 +51,9 @@
 		  or ``"LogLossReduction"``.
 
 
-	:param output_combiner: indicates how to combine the predictions of the different
-	    models into a single prediction. There are five available output
-		combiners for clasification:
+	:param output_combiner: Indicates how to combine the predictions of the
+	    different models into a single prediction. There are five available
+		outputcombiners for clasification:
 
 		* ``ClassifierAverage``: computes the average of the scores produced by
 		  the trained models.
@@ -92,7 +92,7 @@
         and ``0 <= b <= 1`` and ``b - a = 1``. This normalizer preserves
         sparsity by mapping zero to zero.
 
-	:param batch_size: train the models iteratively on subsets of the training
+	:param batch_size: Train the models iteratively on subsets of the training
 	    set of this size. When using this option, it is assumed that the
 		training set is randomized enough so that every batch is a random
 		sample of instances. The default value is -1, indicating using the

diff --git a/src/python/docs/docstrings/EnsembleRegressor.txt b/src/python/docs/docstrings/EnsembleRegressor.txt
@@ -30,14 +30,14 @@
 		* ``RandomFeatureSelector``: selects a random subset of the features
 		  for each model.
 
-	:param num_models: indicates the number models to train, i.e. the number of
+	:param num_models: Indicates the number models to train, i.e. the number of
 	    subsets of the training set to sample. The default value is 50. If
 		batches are used then this indicates the number of models per batch.
 
 	:param sub_model_selector_type: Determines the efficient set of models the
-	``output_combiner`` uses, and removes the least significant models. This is
-	used to improve the accuracy and reduce the model size. This is also called
-	pruning.
+	    ``output_combiner`` uses, and removes the least significant models.
+	    This is used to improve the accuracy and reduce the model size. This is
+		also called pruning.
 
 	    * ``RegressorAllSelector``: does not perform any pruning and selects
 	      all models in the ensemble to combine to create the output. This is
@@ -51,9 +51,9 @@
 		  ``"RSquared"``.
 
 
-	:param output_combiner: indicates how to combine the predictions of the different
-	    models into a single prediction. There are five available output
-		combiners for clasification:
+	:param output_combiner: Indicates how to combine the predictions of the
+	    different models into a single prediction. There are five available
+		output combiners for clasification:
 
 		* ``RegressorAverage``: computes the average of the scores produced by
 		  the trained models.
@@ -86,7 +86,7 @@
         and ``0 <= b <= 1`` and ``b - a = 1``. This normalizer preserves
         sparsity by mapping zero to zero.
 
-	:param batch_size: train the models iteratively on subsets of the training
+	:param batch_size: Train the models iteratively on subsets of the training
 	    set of this size. When using this option, it is assumed that the
 		training set is randomized enough so that every batch is a random
 		sample of instances. The default value is -1, indicating using the

diff --git a/src/python/docs/docstrings/LinearSvmBinaryClassifier.txt b/src/python/docs/docstrings/LinearSvmBinaryClassifier.txt
@@ -5,12 +5,10 @@
     .. remarks::
         Linear SVM implements an algorithm that finds a hyperplane in the
 		feature space for binary classification, by solving an SVM problem.
-		For instance, with feature values $f_0, f_1,..., f_{D-1}$, the
-		prediction is given by determining what side of the hyperplane the
-		point falls into. That is the same as the sign of the feautures'
-		weighted sum, i.e. $\sum_{i = 0}^{D-1} \left(w_i * f_i \right) + b$,
-		where $w_0, w_1,..., w_{D-1}$ are the weights computed by the
-		algorithm, and *b* is the bias computed by the algorithm.
+		For instance, for a given feature vector, the prediction is given by
+		determining what side of the hyperplane the	point falls into. That is
+		the same as the sign of the feautures' weighted sum (the weights being
+		computed by the algorithm) plus the bias computed by the algorithm.
 
 		This algorithm implemented is the PEGASOS method, which alternates
 		between stochastic gradient descent steps and projection steps,

diff --git a/src/python/nimbusml/__init__.py b/src/python/nimbusml/__init__.py
@@ -2,7 +2,7 @@
 Microsoft Machine Learning for Python
 """
 
-__version__ = '1.3.0'
+__version__ = '1.3.1'
 
 # CoreCLR version of MicrosoftML is built on Windows.
 # But file permissions are not preserved when it's copied to Linux.

diff --git a/src/python/nimbusml/ensemble/__init__.py b/src/python/nimbusml/ensemble/__init__.py
@@ -27,5 +27,3 @@
     'LightGbmRanker',
     'LightGbmRegressor'
 ]
-
-
diff --git a/src/python/nimbusml/ensemble/_ensembleclassifier.py b/src/python/nimbusml/ensemble/_ensembleclassifier.py
@@ -57,14 +57,14 @@ class EnsembleClassifier(core, BasePredictor, ClassifierMixin):
         * ``RandomFeatureSelector``: selects a random subset of the features
           for each model.
 
-    :param num_models: indicates the number models to train, i.e. the number of
+    :param num_models: Indicates the number models to train, i.e. the number of
         subsets of the training set to sample. The default value is 50. If
         batches are used then this indicates the number of models per batch.
 
     :param sub_model_selector_type: Determines the efficient set of models the
-    ``output_combiner`` uses, and removes the least significant models. This is
-    used to improve the accuracy and reduce the model size. This is also called
-    pruning.
+        ``output_combiner`` uses, and removes the least significant models.
+        This is used to improve the accuracy and reduce the model size. This is
+        also called pruning.
 
         * ``ClassifierAllSelector``: does not perform any pruning and selects
           all models in the ensemble to combine to create the output. This is
@@ -77,9 +77,9 @@ class EnsembleClassifier(core, BasePredictor, ClassifierMixin):
           ``"AccuracyMicro"``, ``"AccuracyMacro"``,    ``"LogLoss"``,
           or ``"LogLossReduction"``.
 
-    :param output_combiner: indicates how to combine the predictions of the different
-        models into a single prediction. There are five available output
-        combiners for clasification:
+    :param output_combiner: Indicates how to combine the predictions of the
+        different models into a single prediction. There are five available
+        outputcombiners for clasification:
 
         * ``ClassifierAverage``: computes the average of the scores produced by
           the trained models.
@@ -123,7 +123,7 @@ class EnsembleClassifier(core, BasePredictor, ClassifierMixin):
     :param train_parallel: All the base learners will run asynchronously if the
         value is true.
 
-    :param batch_size: train the models iteratively on subsets of the training
+    :param batch_size: Train the models iteratively on subsets of the training
         set of this size. When using this option, it is assumed that the
         training set is randomized enough so that every batch is a random
         sample of instances. The default value is -1, indicating using the

diff --git a/src/python/nimbusml/ensemble/_ensembleregressor.py b/src/python/nimbusml/ensemble/_ensembleregressor.py
@@ -57,14 +57,14 @@ class EnsembleRegressor(core, BasePredictor, RegressorMixin):
         * ``RandomFeatureSelector``: selects a random subset of the features
           for each model.
 
-    :param num_models: indicates the number models to train, i.e. the number of
+    :param num_models: Indicates the number models to train, i.e. the number of
         subsets of the training set to sample. The default value is 50. If
         batches are used then this indicates the number of models per batch.
 
     :param sub_model_selector_type: Determines the efficient set of models the
-    ``output_combiner`` uses, and removes the least significant models. This is
-    used to improve the accuracy and reduce the model size. This is also called
-    pruning.
+        ``output_combiner`` uses, and removes the least significant models.
+        This is used to improve the accuracy and reduce the model size. This is
+        also called pruning.
 
         * ``RegressorAllSelector``: does not perform any pruning and selects
           all models in the ensemble to combine to create the output. This is
@@ -77,9 +77,9 @@ class EnsembleRegressor(core, BasePredictor, RegressorMixin):
           can be ``"L1"``, ``"L2"``, ``"Rms"``, or ``"Loss"``, or
           ``"RSquared"``.
 
-    :param output_combiner: indicates how to combine the predictions of the different
-        models into a single prediction. There are five available output
-        combiners for clasification:
+    :param output_combiner: Indicates how to combine the predictions of the
+        different models into a single prediction. There are five available
+        output combiners for clasification:
 
         * ``RegressorAverage``: computes the average of the scores produced by
           the trained models.
@@ -117,7 +117,7 @@ class EnsembleRegressor(core, BasePredictor, RegressorMixin):
     :param train_parallel: All the base learners will run asynchronously if the
         value is true.
 
-    :param batch_size: train the models iteratively on subsets of the training
+    :param batch_size: Train the models iteratively on subsets of the training
         set of this size. When using this option, it is assumed that the
         training set is randomized enough so that every batch is a random
         sample of instances. The default value is -1, indicating using the

diff --git a/src/python/nimbusml/internal/core/ensemble/_ensembleclassifier.py b/src/python/nimbusml/internal/core/ensemble/_ensembleclassifier.py
@@ -57,14 +57,14 @@ class EnsembleClassifier(
         * ``RandomFeatureSelector``: selects a random subset of the features
           for each model.
 
-    :param num_models: indicates the number models to train, i.e. the number of
+    :param num_models: Indicates the number models to train, i.e. the number of
         subsets of the training set to sample. The default value is 50. If
         batches are used then this indicates the number of models per batch.
 
     :param sub_model_selector_type: Determines the efficient set of models the
-    ``output_combiner`` uses, and removes the least significant models. This is
-    used to improve the accuracy and reduce the model size. This is also called
-    pruning.
+        ``output_combiner`` uses, and removes the least significant models.
+        This is used to improve the accuracy and reduce the model size. This is
+        also called pruning.
 
         * ``ClassifierAllSelector``: does not perform any pruning and selects
           all models in the ensemble to combine to create the output. This is
@@ -77,9 +77,9 @@ class EnsembleClassifier(
           ``"AccuracyMicro"``, ``"AccuracyMacro"``,    ``"LogLoss"``,
           or ``"LogLossReduction"``.
 
-    :param output_combiner: indicates how to combine the predictions of the different
-        models into a single prediction. There are five available output
-        combiners for clasification:
+    :param output_combiner: Indicates how to combine the predictions of the
+        different models into a single prediction. There are five available
+        outputcombiners for clasification:
 
         * ``ClassifierAverage``: computes the average of the scores produced by
           the trained models.
@@ -123,7 +123,7 @@ class EnsembleClassifier(
     :param train_parallel: All the base learners will run asynchronously if the
         value is true.
 
-    :param batch_size: train the models iteratively on subsets of the training
+    :param batch_size: Train the models iteratively on subsets of the training
         set of this size. When using this option, it is assumed that the
         training set is randomized enough so that every batch is a random
         sample of instances. The default value is -1, indicating using the
Original file line number	Diff line number	Diff line change
Expand Up		@@ -27,5 +27,3 @@
		'LightGbmRanker',
		'LightGbmRegressor'
		]