diff --git a/build.cmd b/build.cmd index 8c371854..55d6e937 100644 --- a/build.cmd +++ b/build.cmd @@ -388,7 +388,7 @@ if "%InstallPythonPackages%" == "True" ( echo "Installing python packages ... " echo "#################################" call "%PythonExe%" -m pip install --upgrade pip - call "%PythonExe%" -m pip install --upgrade nose pytest graphviz imageio pytest-cov "jupyter_client>=4.4.0" "nbconvert>=4.2.0" + call "%PythonExe%" -m pip install --upgrade nose pytest pytest-xdist graphviz imageio pytest-cov "jupyter_client>=4.4.0" "nbconvert>=4.2.0" if %PythonVersion% == 2.7 ( call "%PythonExe%" -m pip install --upgrade pyzmq @@ -414,19 +414,27 @@ set TestsPath1=%PackagePath%\tests set TestsPath2=%__currentScriptDir%src\python\tests set TestsPath3=%__currentScriptDir%src\python\tests_extended set ReportPath=%__currentScriptDir%build\TestCoverageReport -call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath1%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%" -if errorlevel 1 ( - goto :Exit_Error -) -call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath2%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%" +set NumConcurrentTests=%NUMBER_OF_PROCESSORS% + +call "%PythonExe%" -m pytest -n %NumConcurrentTests% --verbose --maxfail=1000 --capture=sys "%TestsPath2%" "%TestsPath1%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%" if errorlevel 1 ( - goto :Exit_Error + :: Rerun any failed tests to give them one more + :: chance in case the errors were intermittent. + call "%PythonExe%" -m pytest -n %NumConcurrentTests% --last-failed --verbose --maxfail=1000 --capture=sys "%TestsPath2%" "%TestsPath1%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%" + if errorlevel 1 ( + goto :Exit_Error + ) ) if "%RunExtendedTests%" == "True" ( - call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath3%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%" + call "%PythonExe%" -m pytest -n %NumConcurrentTests% --verbose --maxfail=1000 --capture=sys "%TestsPath3%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%" if errorlevel 1 ( - goto :Exit_Error + :: Rerun any failed tests to give them one more + :: chance in case the errors were intermittent. + call "%PythonExe%" -m pytest -n %NumConcurrentTests% --last-failed --verbose --maxfail=1000 --capture=sys "%TestsPath3%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%" + if errorlevel 1 ( + goto :Exit_Error + ) ) ) diff --git a/build.sh b/build.sh index e0d3e565..d7567ac5 100755 --- a/build.sh +++ b/build.sh @@ -280,7 +280,7 @@ then exit 1 fi # Review: Adding "--upgrade" to pip install will cause problems when using Anaconda as the python distro because of Anaconda's quirks with pytest. - "${PythonExe}" -m pip install nose "pytest>=4.4.0" graphviz "pytest-cov>=2.6.1" "jupyter_client>=4.4.0" "nbconvert>=4.2.0" + "${PythonExe}" -m pip install nose "pytest>=4.4.0" pytest-xdist graphviz "pytest-cov>=2.6.1" "jupyter_client>=4.4.0" "nbconvert>=4.2.0" if [ ${PythonVersion} = 2.7 ] then "${PythonExe}" -m pip install --upgrade pyzmq @@ -307,8 +307,7 @@ then TestsPath2=${__currentScriptDir}/src/python/tests TestsPath3=${__currentScriptDir}/src/python/tests_extended ReportPath=${__currentScriptDir}/build/TestCoverageReport - "${PythonExe}" -m pytest --verbose --maxfail=1000 --capture=sys "${TestsPath1}" - "${PythonExe}" -m pytest --verbose --maxfail=1000 --capture=sys "${TestsPath2}" + "${PythonExe}" -m pytest -n 4 --verbose --maxfail=1000 --capture=sys "${TestsPath2}" "${TestsPath1}" if [ ${__runExtendedTests} = true ] then @@ -325,7 +324,7 @@ then yum install glibc-devel -y } fi - "${PythonExe}" -m pytest --verbose --maxfail=1000 --capture=sys "${TestsPath3}" + "${PythonExe}" -m pytest -n 4 --verbose --maxfail=1000 --capture=sys "${TestsPath3}" fi fi diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index 704622a4..71ee437d 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -1529,10 +1529,14 @@ def _evaluation_infer(self, evaltype, label_column, group_id, models_anomalydetectionevaluator(**params)]) elif type_ == 'ranking': - svd = "$scoredVectorData" column = [OrderedDict(Source=group_id, Name=group_id)] - algo_args = dict(data=svd, output_data=svd, column=column) + algo_args = dict( + data="$scoredVectorData", + output_data="$scoredVectorData2", + column=column) key_node = transforms_texttokeyconverter(**algo_args) + + params['data'] = "$scoredVectorData2" evaluate_node = models_rankingevaluator( group_id_column=group_id, **params) all_nodes.extend([ diff --git a/src/python/nimbusml/tests/model_summary/test_model_summary.py b/src/python/nimbusml/tests/model_summary/test_model_summary.py index 650238ae..87ab897a 100644 --- a/src/python/nimbusml/tests/model_summary/test_model_summary.py +++ b/src/python/nimbusml/tests/model_summary/test_model_summary.py @@ -71,7 +71,7 @@ GamBinaryClassifier(), PcaAnomalyDetector(), FactorizationMachineBinaryClassifier(), - KMeansPlusPlus(), + KMeansPlusPlus(n_clusters=2), NaiveBayesClassifier(), FastForestBinaryClassifier(number_of_trees=2), FastForestRegressor(number_of_trees=2), @@ -119,24 +119,24 @@ def test_summary_called_back_to_back_on_predictor(self): ols.summary() def test_pipeline_summary_is_refreshed_after_refitting(self): - predictor = OrdinaryLeastSquaresRegressor(normalize='No', l2_regularization=0) + predictor = OrdinaryLeastSquaresRegressor() pipeline = Pipeline([predictor]) pipeline.fit([0,1,2,3], [1,2,3,4]) summary1 = pipeline.summary() - pipeline.fit([0,1,2,3], [2,5,8,11]) + pipeline.fit([0,1,2.5,3], [2,5,8,11]) summary2 = pipeline.summary() self.assertFalse(summary1.equals(summary2)) def test_predictor_summary_is_refreshed_after_refitting(self): - predictor = OrdinaryLeastSquaresRegressor(normalize='No', l2_regularization=0) + predictor = OrdinaryLeastSquaresRegressor() predictor.fit([0,1,2,3], [1,2,3,4]) summary1 = predictor.summary() - predictor.fit([0,1,2,3], [2,5,8,11]) + predictor.fit([0,1,2.5,3], [2,5,8,11]) summary2 = predictor.summary() self.assertFalse(summary1.equals(summary2)) diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py index 19bc26ce..3807507e 100644 --- a/src/python/nimbusml/tests/pipeline/test_load_save.py +++ b/src/python/nimbusml/tests/pipeline/test_load_save.py @@ -5,6 +5,7 @@ import os import pickle +import tempfile import unittest import numpy as np @@ -32,6 +33,12 @@ (train, label) = get_X_y(train_file, label_column, sep=',') (test, test_label) = get_X_y(test_file, label_column, sep=',') +def get_temp_file(suffix=None): + fd, file_name = tempfile.mkstemp(suffix=suffix) + fl = os.fdopen(fd, 'w') + fl.close() + return file_name + class TestLoadSave(unittest.TestCase): @@ -48,7 +55,7 @@ def test_model_dataframe(self): model_nimbusml.fit(train, label) # Save with pickle - pickle_filename = 'nimbusml_model.p' + pickle_filename = get_temp_file(suffix='.p') with open(pickle_filename, 'wb') as f: pickle.dump(model_nimbusml, f) @@ -65,9 +72,10 @@ def test_model_dataframe(self): test, test_label, output_scores=True) # Save load with pipeline methods - model_nimbusml.save_model('model.nimbusml.m') + model_filename = get_temp_file(suffix='.m') + model_nimbusml.save_model(model_filename) model_nimbusml_load = Pipeline() - model_nimbusml_load.load_model('model.nimbusml.m') + model_nimbusml_load.load_model(model_filename) score1 = model_nimbusml.predict(test).head(5) score2 = model_nimbusml_load.predict(test).head(5) @@ -82,7 +90,7 @@ def test_model_dataframe(self): model_nimbusml_load.sum().sum(), decimal=2) - os.remove('model.nimbusml.m') + os.remove(model_filename) def test_model_datastream(self): model_nimbusml = Pipeline( @@ -97,7 +105,7 @@ def test_model_datastream(self): model_nimbusml.fit(train, label) # Save with pickle - pickle_filename = 'nimbusml_model.p' + pickle_filename = get_temp_file(suffix='.p') with open(pickle_filename, 'wb') as f: pickle.dump(model_nimbusml, f) @@ -120,9 +128,10 @@ def test_model_datastream(self): decimal=2) # Save load with pipeline methods - model_nimbusml.save_model('model.nimbusml.m') + model_filename = get_temp_file(suffix='.m') + model_nimbusml.save_model(model_filename) model_nimbusml_load = Pipeline() - model_nimbusml_load.load_model('model.nimbusml.m') + model_nimbusml_load.load_model(model_filename) score1 = model_nimbusml.predict(test).head(5) score2 = model_nimbusml_load.predict(test).head(5) @@ -137,7 +146,7 @@ def test_model_datastream(self): model_nimbusml_load.sum().sum(), decimal=2) - os.remove('model.nimbusml.m') + os.remove(model_filename) def test_pipeline_saves_complete_model_file_when_pickled(self): model_nimbusml = Pipeline( @@ -152,7 +161,7 @@ def test_pipeline_saves_complete_model_file_when_pickled(self): model_nimbusml.fit(train, label) metrics, score = model_nimbusml.test(test, test_label, output_scores=True) - pickle_filename = 'nimbusml_model.p' + pickle_filename = get_temp_file(suffix='.p') # Save with pickle with open(pickle_filename, 'wb') as f: @@ -202,7 +211,7 @@ def test_unfitted_pickled_pipeline_can_be_fit(self): shuffle=False, number_of_threads=1))]) - pickle_filename = 'nimbusml_model.p' + pickle_filename = get_temp_file(suffix='.p') # Save with pickle with open(pickle_filename, 'wb') as f: @@ -234,7 +243,7 @@ def test_unpickled_pipeline_has_feature_contributions(self): fc = model_nimbusml.get_feature_contributions(test) # Save with pickle - pickle_filename = 'nimbusml_model.p' + pickle_filename = get_temp_file(suffix='.p') with open(pickle_filename, 'wb') as f: pickle.dump(model_nimbusml, f) # Unpickle model @@ -260,7 +269,7 @@ def test_unpickled_predictor_has_feature_contributions(self): fc = model_nimbusml.get_feature_contributions(test) # Save with pickle - pickle_filename = 'nimbusml_model.p' + pickle_filename = get_temp_file(suffix='.p') with open(pickle_filename, 'wb') as f: pickle.dump(model_nimbusml, f) # Unpickle model @@ -287,7 +296,7 @@ def test_pipeline_loaded_from_zip_has_feature_contributions(self): fc = model_nimbusml.get_feature_contributions(test) # Save the model to zip - model_filename = 'nimbusml_model.zip' + model_filename = get_temp_file(suffix='.zip') model_nimbusml.save_model(model_filename) # Load the model from zip model_nimbusml_zip = Pipeline() @@ -312,7 +321,7 @@ def test_predictor_loaded_from_zip_has_feature_contributions(self): fc = model_nimbusml.get_feature_contributions(test) # Save the model to zip - model_filename = 'nimbusml_model.zip' + model_filename = get_temp_file(suffix='.zip') model_nimbusml.save_model(model_filename) # Load the model from zip model_nimbusml_zip = Pipeline() @@ -347,7 +356,7 @@ def test_pickled_pipeline_with_predictor_model(self): self.assertTrue(pipeline.predictor_model) self.assertNotEqual(pipeline.model, pipeline.predictor_model) - pickle_filename = 'nimbusml_model.p' + pickle_filename = get_temp_file(suffix='.p') with open(pickle_filename, 'wb') as f: pickle.dump(pipeline, f) diff --git a/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py b/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py index 347b2798..04f1bc35 100644 --- a/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py +++ b/src/python/nimbusml/tests/pipeline/test_permutation_feature_importance.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------------------------- import os +import tempfile import unittest from nimbusml import FileDataStream @@ -16,6 +17,14 @@ from numpy.testing import assert_almost_equal from pandas.testing import assert_frame_equal + +def get_temp_model_file(): + fd, file_name = tempfile.mkstemp(suffix='.zip') + fl = os.fdopen(fd, 'w') + fl.close() + return file_name + + class TestPermutationFeatureImportance(unittest.TestCase): @classmethod @@ -65,7 +74,7 @@ def test_binary_classifier(self): assert_almost_equal(self.binary_pfi['AreaUnderPrecisionRecallCurve'].sum(), -0.19365, 5) def test_binary_classifier_from_loaded_model(self): - model_path = "model.zip" + model_path = get_temp_model_file() self.binary_model.save_model(model_path) loaded_model = Pipeline() loaded_model.load_model(model_path) @@ -81,7 +90,7 @@ def test_clasifier(self): assert_almost_equal(self.classifier_pfi['PerClassLogLoss.1'].sum(), 0.419826, 6) def test_classifier_from_loaded_model(self): - model_path = "model.zip" + model_path = get_temp_model_file() self.classifier_model.save_model(model_path) loaded_model = Pipeline() loaded_model.load_model(model_path) @@ -96,7 +105,7 @@ def test_regressor(self): assert_almost_equal(self.regressor_pfi['RSquared'].sum(), -0.203612, 6) def test_regressor_from_loaded_model(self): - model_path = "model.zip" + model_path = get_temp_model_file() self.regressor_model.save_model(model_path) loaded_model = Pipeline() loaded_model.load_model(model_path) @@ -113,7 +122,7 @@ def test_ranker(self): assert_almost_equal(self.ranker_pfi['NDCG@3'].sum(), -0.236544, 6) def test_ranker_from_loaded_model(self): - model_path = "model.zip" + model_path = get_temp_model_file() self.ranker_model.save_model(model_path) loaded_model = Pipeline() loaded_model.load_model(model_path) diff --git a/src/python/tests/test_estimator_checks.py b/src/python/tests/test_estimator_checks.py index e40af618..d8a19e1f 100644 --- a/src/python/tests/test_estimator_checks.py +++ b/src/python/tests/test_estimator_checks.py @@ -7,7 +7,9 @@ """ import json import os +import unittest +from nimbusml.cluster import KMeansPlusPlus from nimbusml.decomposition import FactorizationMachineBinaryClassifier from nimbusml.ensemble import EnsembleClassifier from nimbusml.ensemble import EnsembleRegressor @@ -155,6 +157,7 @@ 'check_estimators_overwrite_params, \ check_estimator_sparse_data, check_estimators_pickle, ' 'check_estimators_nan_inf', + 'OrdinaryLeastSquaresRegressor': 'check_fit2d_1sample' } OMITTED_CHECKS_TUPLE = ( @@ -196,6 +199,7 @@ 'EnsembleClassifier': EnsembleClassifier(num_models=3), 'EnsembleRegressor': EnsembleRegressor(num_models=3), 'FactorizationMachineBinaryClassifier': FactorizationMachineBinaryClassifier(shuffle=False), + 'KMeansPlusPlus': KMeansPlusPlus(n_clusters=2), 'LightGbmBinaryClassifier': LightGbmBinaryClassifier( minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'LightGbmClassifier': LightGbmClassifier( @@ -247,13 +251,13 @@ MULTI_OUTPUT.extend(MULTI_OUTPUT_EX) - -def my_import(name): - components = name.split('.') - mod = __import__(components[0]) - for comp in components[1:]: - mod = getattr(mod, comp) - return mod +skip_epoints = set([ + 'OneVsRestClassifier', + 'TreeFeaturizer', + # skip SymSgdBinaryClassifier for now, because of crashes. + 'SymSgdBinaryClassifier', + 'DatasetTransformer' +]) def load_json(file_path): @@ -263,97 +267,79 @@ def load_json(file_path): content_without_comments = '\n'.join(lines) return json.loads(content_without_comments) +def get_epoints(): + epoints = [] + my_path = os.path.realpath(__file__) + my_dir = os.path.dirname(my_path) + manifest_diff_json = os.path.join(my_dir, '..', 'tools', + 'manifest_diff.json') + manifest_diff = load_json(manifest_diff_json) + for e in manifest_diff['EntryPoints']: + if (e['NewName'] not in skip_epoints) and ('LightGbm' not in e['NewName']): + epoints.append((e['Module'], e['NewName'])) + + return epoints -skip_epoints = set([ - 'OneVsRestClassifier', - 'TreeFeaturizer', - # skip SymSgdBinaryClassifier for now, because of crashes. - 'SymSgdBinaryClassifier', - 'DatasetTransformer' -]) -epoints = [] -my_path = os.path.realpath(__file__) -my_dir = os.path.dirname(my_path) -manifest_diff_json = os.path.join(my_dir, '..', 'tools', - 'manifest_diff.json') -manifest_diff = load_json(manifest_diff_json) -for e in manifest_diff['EntryPoints']: - if e['NewName'] not in skip_epoints: - epoints.append((e['Module'], e['NewName'])) +class TestEstimatorChecks(unittest.TestCase): + # This method is a static method of the class + # because there were pytest fixture related + # issues when the method was in the global scope. + @staticmethod + def generate_test_method(epoint): + def method(self): + failed_checks = set() + passed_checks = set() + class_name = epoint[1] + print("\n======== now Estimator is %s =========== " % class_name) -all_checks = {} -all_failed_checks = {} -all_passed_checks = {} -total_checks_passed = 0 + mod = __import__('nimbusml.' + epoint[0], fromlist=[str(class_name)]) + the_class = getattr(mod, class_name) + if class_name in INSTANCES: + estimator = INSTANCES[class_name] + else: + estimator = the_class() -print("total entrypoints: {}", len(epoints)) + if estimator._use_single_input_as_string(): + estimator = estimator << 'F0' -for e in epoints: - checks = set() - failed_checks = set() - passed_checks = set() - class_name = e[1] - print("======== now Estimator is %s =========== " % class_name) - # skip LighGbm for now, because of random crashes. - if 'LightGbm' in class_name: - continue + for check in _yield_all_checks(class_name, estimator): + # Skip check_dict_unchanged for estimators which + # update the classes_ attribute. For more details + # see https://github.com/microsoft/NimbusML/pull/200 + if (check.__name__ == 'check_dict_unchanged') and \ + (hasattr(estimator, 'predict_proba') or + hasattr(estimator, 'decision_function')): + continue - mod = __import__('nimbusml.' + e[0], fromlist=[str(class_name)]) - the_class = getattr(mod, class_name) - if class_name in INSTANCES: - estimator = INSTANCES[class_name] - else: - estimator = the_class() + if check.__name__ in OMITTED_CHECKS_ALWAYS: + continue + if 'Binary' in class_name and check.__name__ in NOBINARY_CHECKS: + continue + if class_name in OMITTED_CHECKS and check.__name__ in \ + OMITTED_CHECKS[class_name]: + continue + if class_name in OMITTED_CHECKS_TUPLE[0] and check.__name__ in \ + OMITTED_CHECKS_TUPLE[1]: + continue - if estimator._use_single_input_as_string(): - estimator = estimator << 'F0' + try: + check(class_name, estimator.clone()) + passed_checks.add(check.__name__) + except Exception as e: + failed_checks.add(check.__name__) - for check in _yield_all_checks(class_name, estimator): - # Skip check_dict_unchanged for estimators which - # update the classes_ attribute. For more details - # see https://github.com/microsoft/NimbusML/pull/200 - if (check.__name__ == 'check_dict_unchanged') and \ - (hasattr(estimator, 'predict_proba') or - hasattr(estimator, 'decision_function')): - continue + if len(failed_checks) > 0: + self.fail(msg=str(failed_checks)) - if check.__name__ in OMITTED_CHECKS_ALWAYS: - continue - if 'Binary' in class_name and check.__name__ in NOBINARY_CHECKS: - continue - if class_name in OMITTED_CHECKS and check.__name__ in \ - OMITTED_CHECKS[class_name]: - continue - if class_name in OMITTED_CHECKS_TUPLE[0] and check.__name__ in \ - OMITTED_CHECKS_TUPLE[1]: - continue - checks.add(check.__name__) - try: - check(class_name, estimator.clone()) - passed_checks.add(check.__name__) - total_checks_passed = total_checks_passed + 1 - except Exception as e: - failed_checks.add(check.__name__) + return method - if frozenset(checks) not in all_checks: - all_checks[frozenset(checks)] = [] - all_checks[frozenset(checks)].append(class_name) - if len(failed_checks) > 0: - if frozenset(failed_checks) not in all_failed_checks: - all_failed_checks[frozenset(failed_checks)] = [] - all_failed_checks[frozenset(failed_checks)].append(class_name) +for epoint in get_epoints(): + test_name = 'test_%s' % epoint[1].lower() + method = TestEstimatorChecks.generate_test_method(epoint) + setattr(TestEstimatorChecks, test_name, method) - if frozenset(passed_checks) not in all_passed_checks: - all_passed_checks[frozenset(passed_checks)] = [] - all_passed_checks[frozenset(passed_checks)].append(class_name) -if len(all_failed_checks) > 0: - print("Following tests failed for components:") - for key, value in all_failed_checks.items(): - print('========================') - print(key) - print(value) - raise RuntimeError("estimator checks failed") -print("success, total checks passed %s ", total_checks_passed) +if __name__ == '__main__': + unittest.main() diff --git a/src/python/tests_extended/test_docs_example.py b/src/python/tests_extended/test_docs_example.py index 1b169fe7..d60d4c34 100644 --- a/src/python/tests_extended/test_docs_example.py +++ b/src/python/tests_extended/test_docs_example.py @@ -6,78 +6,126 @@ import platform import subprocess import sys -import time import unittest import six from nimbusml import __file__ as myfile -class TestDocsExamples(unittest.TestCase): - - def test_examples(self): - this = os.path.abspath(os.path.dirname(__file__)) - fold = os.path.normpath( - os.path.join( - this, - '..', - 'nimbusml', - 'examples')) - if not os.path.exists(fold): - raise FileNotFoundError("Unable to find '{0}'.".format(fold)) - - fold_files = [(fold, _) for _ in os.listdir( - fold) if os.path.splitext(_)[-1] == '.py'] - if len(fold_files) == 0: - raise FileNotFoundError( - "Unable to find examples in '{0}'".format(fold)) - - # also include the 'examples_from_dataframe' files - fold_df = os.path.join(fold, 'examples_from_dataframe') - fold_files_df = [(fold_df, _) for _ in os.listdir( - fold_df) if os.path.splitext(_)[-1] == '.py'] - - # merge details of all examples into one list - fold_files.extend(fold_files_df) - fold_files.sort() - - modpath = os.path.abspath(os.path.dirname(myfile)) - modpath = os.path.normpath(os.path.join(os.path.join(modpath), '..')) - os.environ['PYTHONPATH'] = modpath - os.environ['PYTHONIOENCODING'] = 'UTF-8' - - ran = 0 - excs = [] - - for i, (fold, name) in enumerate(fold_files): +exps = [ + "Exception: 'Missing 'English.tok'", + "Missing resource for SSWE", + "Model file for Word Embedding transform could not " + "be found", + "was already trained. Its coefficients will be " + "overwritten. Use clone() to get an untrained " + "version of it.", + "LdaNative.dll", + "CacheClassesFromAssembly", + "Your CPU supports instructions that this TensorFlow", + "CacheClassesFromAssembly: can't map name " + "OLSLinearRegression to Void, already mapped to Void", + # TensorFlowScorer.py + "tensorflow/compiler/xla/service/service.cc:168] XLA service", + "tensorflow/compiler/xla/service/service.cc:175] StreamExecutor device", + "tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency:", + "tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU", + # Binner.py + "from collections import Mapping, defaultdict", + "DeprecationWarning: Using or importing the ABCs", + # BootStrapSample.py + "DeprecationWarning: the imp module is deprecated", + # PipelineWithGridSearchCV2.py + "FutureWarning: You should specify a value for 'cv'", + # PipelineWithGridSearchCV2.py + "DeprecationWarning: The default of the 'iid' parameter", + # PcaAnomalyDetector.py + "UserWarning: Model", + # FastLinearClassifier_iris_df.py + "FutureWarning: elementwise comparison failed", + # PcaAnomalyDetector_df.py + "FutureWarning: Sorting because non-concatenation axis", + # Image.py + "Unable to revert mtime: /Library/Fonts", + "Fontconfig error: Cannot load default config file", + ] +if sys.version_info[:2] <= (3, 6): + # This warning is new but it does not break any + # other unit tests. + # (3, 5) -> (3, 6) for tests on mac + # TODO: Investigate. + exps.append("RuntimeWarning: numpy.dtype size changed") + + +def get_examples(): + this = os.path.abspath(os.path.dirname(__file__)) + folder = os.path.normpath( + os.path.join( + this, + '..', + 'nimbusml', + 'examples')) + if not os.path.exists(folder): + raise FileNotFoundError("Unable to find '{0}'.".format(folder)) + + folder_files = [(folder, _) for _ in os.listdir( + folder) if os.path.splitext(_)[-1] == '.py'] + if len(folder_files) == 0: + raise FileNotFoundError( + "Unable to find examples in '{0}'".format(folder)) + + # also include the 'examples_from_dataframe' files + folder_df = os.path.join(folder, 'examples_from_dataframe') + folder_files_df = [(folder_df, _) for _ in os.listdir( + folder_df) if os.path.splitext(_)[-1] == '.py'] + + # merge details of all examples into one list + folder_files.extend(folder_files_df) + folder_files.sort() + + examples = [] + for folder, name in folder_files: + if name in [ + '__init__.py', + # Bug todo: CustomStopWordsRemover fails on ML.NET side + 'NGramFeaturizer2.py']: + continue + # skip for all linux tests, mac is ok + if os.name == "posix" and platform.linux_distribution()[0] != '': if name in [ - # Bug 294481: CharTokenizer_df fails - # with error about variable length vector - 'CharTokenizer_df.py', - # Bug todo: CustomStopWordsRemover fails on ML.NET side - 'NGramFeaturizer2.py', - ]: + # SymSgdNative fails to load on linux + 'SymSgdBinaryClassifier.py', + 'SymSgdBinaryClassifier_infert_df.py', + # MICROSOFTML_RESOURCE_PATH needs to be setup on linux + 'CharTokenizer.py', + 'WordEmbedding.py', + 'WordEmbedding_df.py', + 'NaiveBayesClassifier_df.py']: continue - # skip for all linux tests, mac is ok - if os.name == "posix" and platform.linux_distribution()[0] != '': - if name in [ - # SymSgdNative fails to load on linux - 'SymSgdBinaryClassifier.py', - 'SymSgdBinaryClassifier_infert_df.py', - # MICROSOFTML_RESOURCE_PATH needs to be setup on linux - 'CharTokenizer.py', - 'WordEmbedding.py', - 'WordEmbedding_df.py', - 'NaiveBayesClassifier_df.py' - ]: - continue - - full = os.path.join(fold, name) - cmd = '"{0}" -u "{1}"'.format( - sys.executable.replace( - 'w.exe', '.exe'), full) - - begin = time.clock() + + examples.append((folder, name)) + + return examples + + +class TestDocsExamples(unittest.TestCase): + # This method is a static method of the class + # because there were pytest fixture related + # issues when the method was in the global scope. + @staticmethod + def generate_test_method(folder, name): + def method(self): + print("\n======== Example: %s =========== " % name) + + modpath = os.path.abspath(os.path.dirname(myfile)) + modpath = os.path.normpath(os.path.join(os.path.join(modpath), '..')) + os.environ['PYTHONPATH'] = modpath + os.environ['PYTHONIOENCODING'] = 'UTF-8' + + full = os.path.join(folder, name) + python_exe = sys.executable.replace('w.exe', '.exe') + cmd = '"{0}" -u "{1}"'.format(python_exe, full) + if six.PY2: FNULL = open(os.devnull, 'w') p = subprocess.Popen( @@ -88,59 +136,14 @@ def test_examples(self): shell=True) stdout, stderr = p.communicate() else: - with subprocess.Popen(cmd, stdout=subprocess.PIPE, + with subprocess.Popen(cmd, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.DEVNULL, shell=True) as p: stdout, stderr = p.communicate() - total = time.clock() - begin - stderr = stderr.decode('utf-8', errors='ignore').strip( - "\n\r\t ") - stdout = stdout.decode('utf-8', errors='ignore').strip( - "\n\r\t ") - exps = [ - "Exception: 'Missing 'English.tok'", - "Missing resource for SSWE", - "Model file for Word Embedding transform could not " - "be found", - "was already trained. Its coefficients will be " - "overwritten. Use clone() to get an untrained " - "version of it.", - "LdaNative.dll", - "CacheClassesFromAssembly", - "Your CPU supports instructions that this TensorFlow", - "CacheClassesFromAssembly: can't map name " - "OLSLinearRegression to Void, already mapped to Void", - # TensorFlowScorer.py - "tensorflow/compiler/xla/service/service.cc:168] XLA service", - "tensorflow/compiler/xla/service/service.cc:175] StreamExecutor device", - "tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency:", - "tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU", - # Binner.py - "from collections import Mapping, defaultdict", - "DeprecationWarning: Using or importing the ABCs", - # BootStrapSample.py - "DeprecationWarning: the imp module is deprecated", - # PipelineWithGridSearchCV2.py - "FutureWarning: You should specify a value for 'cv'", - # PipelineWithGridSearchCV2.py - "DeprecationWarning: The default of the 'iid' parameter", - # PcaAnomalyDetector.py - "UserWarning: Model", - # FastLinearClassifier_iris_df.py - "FutureWarning: elementwise comparison failed", - # PcaAnomalyDetector_df.py - "FutureWarning: Sorting because non-concatenation axis", - # Image.py - "Unable to revert mtime: /Library/Fonts", - "Fontconfig error: Cannot load default config file", - ] - if sys.version_info[:2] <= (3, 6): - # This warning is new but it does not break any - # other unit tests. - # (3, 5) -> (3, 6) for tests on mac - # TODO: Investigate. - exps.append("RuntimeWarning: numpy.dtype size changed") + stderr = stderr.decode('utf-8', errors='ignore').strip("\n\r\t ") + stdout = stdout.decode('utf-8', errors='ignore').strip("\n\r\t ") errors = None if stderr != '': @@ -149,25 +152,6 @@ def test_examples(self): errors = [_ for _ in errors if exp not in _] if errors and (len(errors) > 1 or (len(errors) == 1 and errors[0] != '')): - excs.append(RuntimeError( - "Issue with\n File '{0}'\n--CMD\n{1}\n--ERR\n{2}\n--OUT\n" - "{3}\n--".format(full, cmd, '\n'.join(errors), stdout))) - print("{0}/{1} FAIL - '{2}' in {3}s".format(i + 1, len( - fold_files), name, total)) - if len(excs) > 1: - for ex in excs: - print('--------------') - print(ex) - raise excs[-1] - else: - print("{0}/{1} OK - '{2}' in " - "{3}s".format(i + 1, len(fold_files), name, total)) - ran += 1 - - if len(excs) > 0: - for ex in excs[1:]: - print('--------------') - print(ex) import numpy import pandas import sklearn @@ -176,10 +160,18 @@ def test_examples(self): sklearn.__version__, numpy.__version__] print("DEBUG VERSIONS", versions) - raise excs[0] - elif ran == 0: - raise Exception( - "No example was run in path '{0}'.".format(fold)) + + raise RuntimeError( + "Issue with\n File '{0}'\n--CMD\n{1}\n--ERR\n{2}\n--OUT\n" + "{3}\n--".format(full, cmd, '\n'.join(errors), stdout)) + + return method + + +for example in get_examples(): + test_name = 'test_%s' % example[1].replace('.py', '').lower() + method = TestDocsExamples.generate_test_method(*example) + setattr(TestDocsExamples, test_name, method) if __name__ == "__main__":