microsoft · pieths · Oct 28, 2019 · Oct 15, 2019 · Oct 15, 2019 · Oct 15, 2019
diff --git a/build.cmd b/build.cmd
@@ -388,7 +388,7 @@ if "%InstallPythonPackages%" == "True" (
     echo "Installing python packages ... "
     echo "#################################"
     call "%PythonExe%" -m pip install --upgrade pip
-    call "%PythonExe%" -m pip install --upgrade nose pytest graphviz imageio pytest-cov "jupyter_client>=4.4.0" "nbconvert>=4.2.0"
+    call "%PythonExe%" -m pip install --upgrade nose pytest pytest-xdist graphviz imageio pytest-cov "jupyter_client>=4.4.0" "nbconvert>=4.2.0"
 
     if %PythonVersion% == 2.7 (
         call "%PythonExe%" -m pip install --upgrade pyzmq
@@ -414,19 +414,27 @@ set TestsPath1=%PackagePath%\tests
 set TestsPath2=%__currentScriptDir%src\python\tests
 set TestsPath3=%__currentScriptDir%src\python\tests_extended
 set ReportPath=%__currentScriptDir%build\TestCoverageReport
-call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath1%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
-if errorlevel 1 (
-    goto :Exit_Error
-)
-call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath2%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
+set NumConcurrentTests=%NUMBER_OF_PROCESSORS%
+
+call "%PythonExe%" -m pytest -n %NumConcurrentTests% --verbose --maxfail=1000 --capture=sys "%TestsPath2%" "%TestsPath1%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
 if errorlevel 1 (
-    goto :Exit_Error
+    :: Rerun any failed tests to give them one more
+    :: chance in case the errors were intermittent.
+    call "%PythonExe%" -m pytest -n %NumConcurrentTests% --last-failed --verbose --maxfail=1000 --capture=sys "%TestsPath2%" "%TestsPath1%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
+    if errorlevel 1 (
+        goto :Exit_Error
+    )
 )
 
 if "%RunExtendedTests%" == "True" (
-    call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath3%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
+    call "%PythonExe%" -m pytest -n %NumConcurrentTests% --verbose --maxfail=1000 --capture=sys "%TestsPath3%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
     if errorlevel 1 (
-        goto :Exit_Error
+        :: Rerun any failed tests to give them one more
+        :: chance in case the errors were intermittent.
+        call "%PythonExe%" -m pytest -n %NumConcurrentTests% --last-failed --verbose --maxfail=1000 --capture=sys "%TestsPath3%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
+        if errorlevel 1 (
+            goto :Exit_Error
+        )
     )
 )
 

diff --git a/build.sh b/build.sh
@@ -280,7 +280,7 @@ then
         exit 1
     fi
     # Review: Adding "--upgrade" to pip install will cause problems when using Anaconda as the python distro because of Anaconda's quirks with pytest.
-    "${PythonExe}" -m pip install nose "pytest>=4.4.0" graphviz "pytest-cov>=2.6.1" "jupyter_client>=4.4.0" "nbconvert>=4.2.0"
+    "${PythonExe}" -m pip install nose "pytest>=4.4.0" pytest-xdist graphviz "pytest-cov>=2.6.1" "jupyter_client>=4.4.0" "nbconvert>=4.2.0"
     if [ ${PythonVersion} = 2.7 ]
     then
         "${PythonExe}" -m pip install --upgrade pyzmq
@@ -307,8 +307,7 @@ then
     TestsPath2=${__currentScriptDir}/src/python/tests
     TestsPath3=${__currentScriptDir}/src/python/tests_extended
     ReportPath=${__currentScriptDir}/build/TestCoverageReport
-    "${PythonExe}" -m pytest --verbose --maxfail=1000 --capture=sys "${TestsPath1}"
-    "${PythonExe}" -m pytest --verbose --maxfail=1000 --capture=sys "${TestsPath2}"
+    "${PythonExe}" -m pytest -n 4 --verbose --maxfail=1000 --capture=sys "${TestsPath2}" "${TestsPath1}"
 
     if [ ${__runExtendedTests} = true ]
     then
@@ -325,7 +324,7 @@ then
                 yum install glibc-devel -y
             }
         fi
-        "${PythonExe}" -m pytest --verbose --maxfail=1000 --capture=sys "${TestsPath3}"
+        "${PythonExe}" -m pytest -n 4 --verbose --maxfail=1000 --capture=sys "${TestsPath3}"
     fi
 fi
 

diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py
@@ -1529,10 +1529,14 @@ def _evaluation_infer(self, evaltype, label_column, group_id,
                  models_anomalydetectionevaluator(**params)])
 
         elif type_ == 'ranking':
-            svd = "$scoredVectorData"
             column = [OrderedDict(Source=group_id, Name=group_id)]
-            algo_args = dict(data=svd, output_data=svd, column=column)
+            algo_args = dict(
+                data="$scoredVectorData",
+                output_data="$scoredVectorData2",
+                column=column)
             key_node = transforms_texttokeyconverter(**algo_args)
+
+            params['data'] = "$scoredVectorData2"
             evaluate_node = models_rankingevaluator(
                 group_id_column=group_id, **params)
             all_nodes.extend([

diff --git a/src/python/nimbusml/tests/model_summary/test_model_summary.py b/src/python/nimbusml/tests/model_summary/test_model_summary.py
@@ -71,7 +71,7 @@
     GamBinaryClassifier(),
     PcaAnomalyDetector(),
     FactorizationMachineBinaryClassifier(),
-    KMeansPlusPlus(),
+    KMeansPlusPlus(n_clusters=2),
     NaiveBayesClassifier(),
     FastForestBinaryClassifier(number_of_trees=2), 
     FastForestRegressor(number_of_trees=2),
@@ -119,24 +119,24 @@ def test_summary_called_back_to_back_on_predictor(self):
         ols.summary()
 
     def test_pipeline_summary_is_refreshed_after_refitting(self):
-        predictor = OrdinaryLeastSquaresRegressor(normalize='No', l2_regularization=0)
+        predictor = OrdinaryLeastSquaresRegressor()
         pipeline = Pipeline([predictor])
 
         pipeline.fit([0,1,2,3], [1,2,3,4])
         summary1 = pipeline.summary()
 
-        pipeline.fit([0,1,2,3], [2,5,8,11])
+        pipeline.fit([0,1,2.5,3], [2,5,8,11])
         summary2 = pipeline.summary()
 
         self.assertFalse(summary1.equals(summary2))
 
     def test_predictor_summary_is_refreshed_after_refitting(self):
-        predictor = OrdinaryLeastSquaresRegressor(normalize='No', l2_regularization=0)
+        predictor = OrdinaryLeastSquaresRegressor()
 
         predictor.fit([0,1,2,3], [1,2,3,4])
         summary1 = predictor.summary()
 
-        predictor.fit([0,1,2,3], [2,5,8,11])
+        predictor.fit([0,1,2.5,3], [2,5,8,11])
         summary2 = predictor.summary()
 
         self.assertFalse(summary1.equals(summary2))

diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py
@@ -5,6 +5,7 @@
 
 import os
 import pickle
+import tempfile
 import unittest
 
 import numpy as np
@@ -32,6 +33,12 @@
 (train, label) = get_X_y(train_file, label_column, sep=',')
 (test, test_label) = get_X_y(test_file, label_column, sep=',')
 
+def get_temp_file(suffix=None):
+    fd, file_name = tempfile.mkstemp(suffix=suffix)
+    fl = os.fdopen(fd, 'w')
+    fl.close()
+    return file_name
+
 
 class TestLoadSave(unittest.TestCase):
 
@@ -48,7 +55,7 @@ def test_model_dataframe(self):
         model_nimbusml.fit(train, label)
 
         # Save with pickle
-        pickle_filename = 'nimbusml_model.p'
+        pickle_filename = get_temp_file(suffix='.p')
         with open(pickle_filename, 'wb') as f:
             pickle.dump(model_nimbusml, f)
 
@@ -65,9 +72,10 @@ def test_model_dataframe(self):
             test, test_label, output_scores=True)
 
         # Save load with pipeline methods
-        model_nimbusml.save_model('model.nimbusml.m')
+        model_filename = get_temp_file(suffix='.m')
+        model_nimbusml.save_model(model_filename)
         model_nimbusml_load = Pipeline()
-        model_nimbusml_load.load_model('model.nimbusml.m')
+        model_nimbusml_load.load_model(model_filename)
 
         score1 = model_nimbusml.predict(test).head(5)
         score2 = model_nimbusml_load.predict(test).head(5)
@@ -82,7 +90,7 @@ def test_model_dataframe(self):
             model_nimbusml_load.sum().sum(),
             decimal=2)
 
-        os.remove('model.nimbusml.m')
+        os.remove(model_filename)
 
     def test_model_datastream(self):
         model_nimbusml = Pipeline(
@@ -97,7 +105,7 @@ def test_model_datastream(self):
         model_nimbusml.fit(train, label)
 
         # Save with pickle
-        pickle_filename = 'nimbusml_model.p'
+        pickle_filename = get_temp_file(suffix='.p')
         with open(pickle_filename, 'wb') as f:
             pickle.dump(model_nimbusml, f)
 
@@ -120,9 +128,10 @@ def test_model_datastream(self):
             decimal=2)
 
         # Save load with pipeline methods
-        model_nimbusml.save_model('model.nimbusml.m')
+        model_filename = get_temp_file(suffix='.m')
+        model_nimbusml.save_model(model_filename)
         model_nimbusml_load = Pipeline()
-        model_nimbusml_load.load_model('model.nimbusml.m')
+        model_nimbusml_load.load_model(model_filename)
 
         score1 = model_nimbusml.predict(test).head(5)
         score2 = model_nimbusml_load.predict(test).head(5)
@@ -137,7 +146,7 @@ def test_model_datastream(self):
             model_nimbusml_load.sum().sum(),
             decimal=2)
 
-        os.remove('model.nimbusml.m')
+        os.remove(model_filename)
 
     def test_pipeline_saves_complete_model_file_when_pickled(self):
         model_nimbusml = Pipeline(
@@ -152,7 +161,7 @@ def test_pipeline_saves_complete_model_file_when_pickled(self):
         model_nimbusml.fit(train, label)
         metrics, score = model_nimbusml.test(test, test_label, output_scores=True)
 
-        pickle_filename = 'nimbusml_model.p'
+        pickle_filename = get_temp_file(suffix='.p')
 
         # Save with pickle
         with open(pickle_filename, 'wb') as f:
@@ -202,7 +211,7 @@ def test_unfitted_pickled_pipeline_can_be_fit(self):
                      shuffle=False,
                      number_of_threads=1))])
 
-        pickle_filename = 'nimbusml_model.p'
+        pickle_filename = get_temp_file(suffix='.p')
 
         # Save with pickle
         with open(pickle_filename, 'wb') as f:
@@ -234,7 +243,7 @@ def test_unpickled_pipeline_has_feature_contributions(self):
         fc = model_nimbusml.get_feature_contributions(test)
 
         # Save with pickle
-        pickle_filename = 'nimbusml_model.p'
+        pickle_filename = get_temp_file(suffix='.p')
         with open(pickle_filename, 'wb') as f:
             pickle.dump(model_nimbusml, f)
         # Unpickle model
@@ -260,7 +269,7 @@ def test_unpickled_predictor_has_feature_contributions(self):
         fc = model_nimbusml.get_feature_contributions(test)
 
         # Save with pickle
-        pickle_filename = 'nimbusml_model.p'
+        pickle_filename = get_temp_file(suffix='.p')
         with open(pickle_filename, 'wb') as f:
             pickle.dump(model_nimbusml, f)
         # Unpickle model
@@ -287,7 +296,7 @@ def test_pipeline_loaded_from_zip_has_feature_contributions(self):
         fc = model_nimbusml.get_feature_contributions(test)
 
         # Save the model to zip
-        model_filename = 'nimbusml_model.zip'
+        model_filename = get_temp_file(suffix='.zip')
         model_nimbusml.save_model(model_filename)
         # Load the model from zip
         model_nimbusml_zip = Pipeline()
@@ -312,7 +321,7 @@ def test_predictor_loaded_from_zip_has_feature_contributions(self):
         fc = model_nimbusml.get_feature_contributions(test)
 
         # Save the model to zip
-        model_filename = 'nimbusml_model.zip'
+        model_filename = get_temp_file(suffix='.zip')
         model_nimbusml.save_model(model_filename)
         # Load the model from zip
         model_nimbusml_zip = Pipeline()
@@ -347,7 +356,7 @@ def test_pickled_pipeline_with_predictor_model(self):
         self.assertTrue(pipeline.predictor_model)
         self.assertNotEqual(pipeline.model, pipeline.predictor_model)
 
-        pickle_filename = 'nimbusml_model.p'
+        pickle_filename = get_temp_file(suffix='.p')
         with open(pickle_filename, 'wb') as f:
             pickle.dump(pipeline, f)
 

diff --git a/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py b/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py
@@ -3,6 +3,7 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------------------------
 import os
+import tempfile
 import unittest
 
 from nimbusml import FileDataStream
@@ -16,6 +17,14 @@
 from numpy.testing import assert_almost_equal
 from pandas.testing import assert_frame_equal
 
+
+def get_temp_model_file():
+    fd, file_name = tempfile.mkstemp(suffix='.zip')
+    fl = os.fdopen(fd, 'w')
+    fl.close()
+    return file_name
+
+
 class TestPermutationFeatureImportance(unittest.TestCase):
 
     @classmethod
@@ -65,7 +74,7 @@ def test_binary_classifier(self):
         assert_almost_equal(self.binary_pfi['AreaUnderPrecisionRecallCurve'].sum(), -0.19365, 5)
 
     def test_binary_classifier_from_loaded_model(self):
-        model_path = "model.zip"
+        model_path = get_temp_model_file()
         self.binary_model.save_model(model_path)
         loaded_model = Pipeline()
         loaded_model.load_model(model_path)
@@ -81,7 +90,7 @@ def test_clasifier(self):
         assert_almost_equal(self.classifier_pfi['PerClassLogLoss.1'].sum(), 0.419826, 6)
 
     def test_classifier_from_loaded_model(self):
-        model_path = "model.zip"
+        model_path = get_temp_model_file()
         self.classifier_model.save_model(model_path)
         loaded_model = Pipeline()
         loaded_model.load_model(model_path)
@@ -96,7 +105,7 @@ def test_regressor(self):
         assert_almost_equal(self.regressor_pfi['RSquared'].sum(), -0.203612, 6)
 
     def test_regressor_from_loaded_model(self):
-        model_path = "model.zip"
+        model_path = get_temp_model_file()
         self.regressor_model.save_model(model_path)
         loaded_model = Pipeline()
         loaded_model.load_model(model_path)
@@ -113,7 +122,7 @@ def test_ranker(self):
         assert_almost_equal(self.ranker_pfi['NDCG@3'].sum(), -0.236544, 6)
 
     def test_ranker_from_loaded_model(self):
-        model_path = "model.zip"
+        model_path = get_temp_model_file()
         self.ranker_model.save_model(model_path)
         loaded_model = Pipeline()
         loaded_model.load_model(model_path)