diff --git a/docs/release-notes/release-1.6.0.md b/docs/release-notes/release-1.6.0.md new file mode 100644 index 00000000..fa5ef3d8 --- /dev/null +++ b/docs/release-notes/release-1.6.0.md @@ -0,0 +1,42 @@ +# [NimbusML](https://docs.microsoft.com/en-us/nimbusml/overview) 1.6.0 + +## **New Features** + +- **Initial implementation of NGramExtractor.** + + [PR#320](https://github.com/microsoft/NimbusML/pull/320) + Produces a bag of counts of n-grams (sequences of consecutive values of length 1-n) + in a given vector of keys. It does so by building a dictionary of n-grams and using + the id in the dictionary as the index in the bag. + +- **Update Manifest Generator.** + + [PR#329](https://github.com/microsoft/NimbusML/pull/329) + Update the Manifest Generator project to work with the latest changes and incorporate + it in to the build process. + +## **Bug Fixes** + +None. + +## **Enhancements** + +- **Update To ML.Net Version 1.4.0.** + + [PR#353](https://github.com/microsoft/NimbusML/pull/353) + +- **Update To Latest Version Of DataPrep.** + + [PR#379](https://github.com/microsoft/NimbusML/pull/379) + +- **Update Tests To Execute In Parallel.** + + [PR#331](https://github.com/microsoft/NimbusML/pull/331) + +## **Documentation and Samples** + +None. + +## **Remarks** + +None. diff --git a/release-next.md b/release-next.md index c6cfb7ed..c6d1ec43 100644 --- a/release-next.md +++ b/release-next.md @@ -2,18 +2,7 @@ ## **New Features** -- **Initial implementation of NGramExtractor.** - - [PR#320](https://github.com/microsoft/NimbusML/pull/320) - Produces a bag of counts of n-grams (sequences of consecutive values of length 1-n) - in a given vector of keys. It does so by building a dictionary of n-grams and using - the id in the dictionary as the index in the bag. - -- **Update Manifest Generator.** - - [PR#329](https://github.com/microsoft/NimbusML/pull/329) - Update the Manifest Generator project to work with the latest changes and incorporate - it in to the build process. +None. ## **Bug Fixes** @@ -21,9 +10,7 @@ None. ## **Enhancements** -- **Update Tests To Execute In Parallel.** - - [PR#331](https://github.com/microsoft/NimbusML/pull/331) +None. ## **Documentation and Samples** diff --git a/src/DotNetBridge/DotNetBridge.csproj b/src/DotNetBridge/DotNetBridge.csproj index 9d108c71..38fcba37 100644 --- a/src/DotNetBridge/DotNetBridge.csproj +++ b/src/DotNetBridge/DotNetBridge.csproj @@ -43,7 +43,7 @@ - + diff --git a/src/Platforms/build.csproj b/src/Platforms/build.csproj index c9272462..f027dc30 100644 --- a/src/Platforms/build.csproj +++ b/src/Platforms/build.csproj @@ -22,7 +22,7 @@ - + diff --git a/src/python/docs/docstrings/DssmFeaturizer.txt b/src/python/docs/docstrings/DssmFeaturizer.txt deleted file mode 100644 index d71d2540..00000000 --- a/src/python/docs/docstrings/DssmFeaturizer.txt +++ /dev/null @@ -1,32 +0,0 @@ - """ - - The input to this transform is text. It applies a pretrained DSSM - featurizer and outputs semantic embeddings for - the input vectors and a cosine similarity computed between the query - and document columns. - - .. remarks:: - DSSM is a neural network algorithm that produces feature embeddings - for key-value string pairs. It is trained - using a dataset consisting of positive key-value pairs, from which - the original rows are used as correct - examples, and the strings are recombined to produce adversarial, - incorrect training examples. Some example of - key-value pairs include search query and clicked document title text, - search query and clicked ad content text, - Search using Clickthrough Data `_ , an MSR publication. - - - .. seealso:: - :py:class:`NGramFeaturizer `, - :py:class:`Sentiment `, - :py:class:`SsweEmbedding `, - :py:class:`WordEmbedding `. - - .. index:: transform, featurizer, text - - Example: - .. literalinclude:: /../nimbusml/examples/DssmFeaturizer.py - :language: python - """ \ No newline at end of file diff --git a/src/python/docs/docstrings/SsweEmbedding.txt b/src/python/docs/docstrings/SsweEmbedding.txt index 12a4c509..4e6c56f9 100644 --- a/src/python/docs/docstrings/SsweEmbedding.txt +++ b/src/python/docs/docstrings/SsweEmbedding.txt @@ -63,7 +63,6 @@ .. seealso:: :py:class:`NGramFeaturizer `, - :py:class:`DssmFeaturizer `, :py:class:`Sentiment `, :py:class:`WordEmbedding `. diff --git a/src/python/docs/sphinx/modules/feature_extraction/text/dssmfeaturizer.rst b/src/python/docs/sphinx/modules/feature_extraction/text/dssmfeaturizer.rst deleted file mode 100644 index 7aa663ee..00000000 --- a/src/python/docs/sphinx/modules/feature_extraction/text/dssmfeaturizer.rst +++ /dev/null @@ -1,4 +0,0 @@ -`nimbusml.feature_extraction.text.DssmFeaturizer` -=========================================================== - -.. autoclass:: nimbusml.feature_extraction.text.DssmFeaturizer diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj index f43ade51..5060cdd4 100644 --- a/src/python/nimbusml.pyproj +++ b/src/python/nimbusml.pyproj @@ -959,7 +959,6 @@ - diff --git a/src/python/nimbusml/_pipeline.py b/src/python/nimbusml/_pipeline.py index 8a2d88af..2a6c06fd 100644 --- a/src/python/nimbusml/_pipeline.py +++ b/src/python/nimbusml/_pipeline.py @@ -2543,7 +2543,7 @@ def summary(self, verbose=0, **params): if len(self.steps) > 0 and not isinstance( self.last_node, BasePredictor): raise ValueError( - "Summary is availabe only for predictor types, instead " + "Summary is available only for predictor types, instead " "got " + self.last_node.type) @@ -2581,6 +2581,10 @@ def summary(self, verbose=0, **params): self._run_time = time.time() - start_time raise e + # .summary() not supported if size of summary_data + # is less or equal to 1 (if only PredictedName in summary_data) + if summary_data.size == 1 and summary_data.columns.values == ["PredictorName"]: + raise TypeError("One or more predictors in this pipeline do not support the .summary() function.") self.model_summary = summary_data # stop the clock diff --git a/src/python/nimbusml/tests/model_summary/test_model_summary.py b/src/python/nimbusml/tests/model_summary/test_model_summary.py index 87ab897a..3d0c659c 100644 --- a/src/python/nimbusml/tests/model_summary/test_model_summary.py +++ b/src/python/nimbusml/tests/model_summary/test_model_summary.py @@ -66,25 +66,25 @@ #SymSgdBinaryClassifier(), OrdinaryLeastSquaresRegressor(), PoissonRegressionRegressor(), - OneVsRestClassifier(FastLinearBinaryClassifier()), GamRegressor(), GamBinaryClassifier(), PcaAnomalyDetector(), - FactorizationMachineBinaryClassifier(), - KMeansPlusPlus(n_clusters=2), - NaiveBayesClassifier(), FastForestBinaryClassifier(number_of_trees=2), FastForestRegressor(number_of_trees=2), FastTreesBinaryClassifier(number_of_trees=2), FastTreesRegressor(number_of_trees=2), FastTreesTweedieRegressor(number_of_trees=2), LightGbmRegressor(number_of_iterations=2), - LightGbmClassifier(), LightGbmBinaryClassifier(number_of_iterations=2) ] learners_not_supported = [ - #PcaTransformer(), # REVIEW: crashes + FactorizationMachineBinaryClassifier(), + OneVsRestClassifier(FastLinearBinaryClassifier()), + FactorizationMachineBinaryClassifier(), + KMeansPlusPlus(n_clusters=2), + NaiveBayesClassifier(), + LightGbmClassifier() ] @@ -98,7 +98,6 @@ def test_model_summary(self): pipeline.fit(train_stream, label_column) pipeline.summary() - @unittest.skip("No unsupported learners") def test_model_summary_not_supported(self): for learner in learners_not_supported: pipeline = Pipeline( @@ -107,6 +106,23 @@ def test_model_summary_not_supported(self): pipeline.fit(train_stream, label_column) assert_raises(TypeError, pipeline.summary) + def test_model_summary_not_supported_specific(self): + path = get_dataset('infert').as_filepath() + data = FileDataStream.read_csv(path, sep=',', + names={0: 'row_num', 5: 'case'}) + pipeline = Pipeline([ + OneHotVectorizer(columns={'edu': 'education'}), + FactorizationMachineBinaryClassifier(feature=['induced', 'edu', 'parity'], + label='case') + ]) + pipeline.fit(data) + try: + pipeline.summary() + except TypeError as e: + self.assertEqual(e.args[0], "One or more predictors in this pipeline do not support the .summary() function.") + else: + assert False + def test_summary_called_back_to_back_on_predictor(self): """ When a predictor is fit without using a Pipeline, diff --git a/src/python/tests/test_estimator_checks.py b/src/python/tests/test_estimator_checks.py index d8a19e1f..df7c1e87 100644 --- a/src/python/tests/test_estimator_checks.py +++ b/src/python/tests/test_estimator_checks.py @@ -161,7 +161,7 @@ } OMITTED_CHECKS_TUPLE = ( - 'OneHotHashVectorizer, FromKey, DssmFeaturizer, DnnFeaturizer, ' + 'OneHotHashVectorizer, FromKey, DnnFeaturizer, ' 'PixelExtractor, Loader, Resizer, \ GlobalContrastRowScaler, PcaTransformer, ' 'ColumnConcatenator, Sentiment, CharTokenizer, LightLda, ' diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json index a8cd262e..d56f33de 100644 --- a/src/python/tools/manifest_diff.json +++ b/src/python/tools/manifest_diff.json @@ -176,7 +176,6 @@ "Common.MakeArrayIDataView", "Common.MakeArrayIPredictorModel", "CountTable.Create", - "Dssm.Trigram", "EnsembleCreator.CreateAnomalyPipelineEnsemble", "EnsembleCreator.CreateBinaryEnsemble", "EnsembleCreator.CreateBinaryPipelineEnsemble",