diff --git a/.gitignore b/.gitignore index 01738127..1c981aee 100644 --- a/.gitignore +++ b/.gitignore @@ -348,9 +348,3 @@ data.csv data.txt /build/TestCoverageReport - -# The folder generated by make_yaml.bat -*_build -*mymodeluci.zip -build/sphinxmdoutput-0.2.4.1-py3-none-any.whl -*build \ No newline at end of file diff --git a/README.md b/README.md index 2f4207a5..5fff9d39 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ To build `nimbusml` from source please visit our [developer guide](docs/develope ## Contributing -The contributions guide can be found [here](CONTRIBUTING.md). Given the experimental nature of this project, support will be provided on a best-effort basis. We suggest opening an issue for discussion before starting a PR with big changes. +The contributions guide can be found [here](CONTRIBUTING.md). ## Support diff --git a/src/python/docs/docstrings/Dart.txt b/src/python/docs/docstrings/Dart.txt index faa504e0..d037b6f7 100644 --- a/src/python/docs/docstrings/Dart.txt +++ b/src/python/docs/docstrings/Dart.txt @@ -7,9 +7,9 @@ `_ is an ensemble method of boosted regression trees. The Dropouts meet Multiple Additive Regression - Trees (DART) employs dropouts in MART and overcomes the issues of over- + Trees (DART) employs dropouts in MART and overcomes the issues of over- specialization of MART, - achiving better performance in many tasks. + achieving better performance in many tasks. **Reference** diff --git a/src/python/docs/docstrings/FastLinearBinaryClassifier.txt b/src/python/docs/docstrings/FastLinearBinaryClassifier.txt index c956db97..a16893e8 100644 --- a/src/python/docs/docstrings/FastLinearBinaryClassifier.txt +++ b/src/python/docs/docstrings/FastLinearBinaryClassifier.txt @@ -1,7 +1,7 @@ """ A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer - for linear binary classification and regression. + for linear binary classification. .. remarks:: ``FastLinearBinaryClassifier`` is a trainer based on the Stochastic diff --git a/src/python/docs/docstrings/FastLinearClassifier.txt b/src/python/docs/docstrings/FastLinearClassifier.txt index 32a3d35d..d9984dd5 100644 --- a/src/python/docs/docstrings/FastLinearClassifier.txt +++ b/src/python/docs/docstrings/FastLinearClassifier.txt @@ -1,6 +1,7 @@ """ - Train an SDCA multi class model + A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer for + multi class classification. .. remarks:: ``FastLinearClassifier`` is a trainer based on the Stochastic Dual diff --git a/src/python/docs/docstrings/FastLinearRegressor.txt b/src/python/docs/docstrings/FastLinearRegressor.txt index 080008fa..9e7c5d88 100644 --- a/src/python/docs/docstrings/FastLinearRegressor.txt +++ b/src/python/docs/docstrings/FastLinearRegressor.txt @@ -1,7 +1,7 @@ """ A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer - for linear binary classification and regression. + for linear regression. .. remarks:: ``FastLinearRegressor`` is a trainer based on the Stochastic Dual diff --git a/src/python/docs/docstrings/FromKey.txt b/src/python/docs/docstrings/FromKey.txt index a61b7064..fd162550 100644 --- a/src/python/docs/docstrings/FromKey.txt +++ b/src/python/docs/docstrings/FromKey.txt @@ -1,7 +1,6 @@ """ - Text transforms that can be performed on data before training - a model. + Converts the key types back to their original values. .. remarks:: The ``FromKey`` transform converts a column of keys, generated using diff --git a/src/python/docs/docstrings/Goss.txt b/src/python/docs/docstrings/Goss.txt index 7ae86ec2..97265859 100644 --- a/src/python/docs/docstrings/Goss.txt +++ b/src/python/docs/docstrings/Goss.txt @@ -5,9 +5,9 @@ .. remarks:: Gradient-based One-Side Sampling (GOSS) employs an adaptive sampling named gradient-based - sampling. For datasets with large sample size, GOSS has considerable + sampling. For datasets with large sample size, GOSS has considerable advantage in terms of - statistical and computational efficiency. + statistical and computational efficiency. diff --git a/src/python/docs/docstrings/Handler.txt b/src/python/docs/docstrings/Handler.txt index 01d767e8..4a639e1e 100644 --- a/src/python/docs/docstrings/Handler.txt +++ b/src/python/docs/docstrings/Handler.txt @@ -33,14 +33,13 @@ For more details see `Columns `_. :param replace_with: The method to use to replace NaN values. The - following choices are available. - - * Def: Replace with default value of that type, usually ``0``. If no - replace - method is specified, this is the default strategy. - * Mean: Replace NaN values with the mean of the values in that column. - * Min: Replace with minimum value in the column. - * Max: Replace with maximum value in the column. + following choices are available. + + * Def: Replace with default value of that type, usually ``0``. If no + replace method is specified, this is the default strategy. + * Mean: Replace NaN values with the mean of the values in that column. + * Min: Replace with minimum value in the column. + * Max: Replace with maximum value in the column. .. seealso:: :py:class:`Filter `, diff --git a/src/python/docs/docstrings/Loader.txt b/src/python/docs/docstrings/Loader.txt index ca290c1e..e94fb9e1 100644 --- a/src/python/docs/docstrings/Loader.txt +++ b/src/python/docs/docstrings/Loader.txt @@ -1,6 +1,6 @@ """ - Loaders image data. + Loads image data. .. remarks:: ``Loader`` loads images from paths. diff --git a/src/python/docs/docstrings/NGram.txt b/src/python/docs/docstrings/NGram.txt index e05c292a..e4d681db 100644 --- a/src/python/docs/docstrings/NGram.txt +++ b/src/python/docs/docstrings/NGram.txt @@ -1,6 +1,6 @@ """ - Extracts NGrams from text and convert them to vector using + Extracts NGrams from text and converts them to vector using dictionary. .. remarks:: diff --git a/src/python/docs/docstrings/NgramHash.txt b/src/python/docs/docstrings/NgramHash.txt index b7e34e8a..a1969901 100644 --- a/src/python/docs/docstrings/NgramHash.txt +++ b/src/python/docs/docstrings/NgramHash.txt @@ -1,6 +1,6 @@ """ - Extracts NGrams from text and convert them to vector using hashing + Extracts NGrams from text and converts them to vector using hashing trick. .. remarks:: diff --git a/src/python/docs/docstrings/PrefixColumnConcatenator.txt b/src/python/docs/docstrings/PrefixColumnConcatenator.txt new file mode 100644 index 00000000..aac3d116 --- /dev/null +++ b/src/python/docs/docstrings/PrefixColumnConcatenator.txt @@ -0,0 +1,44 @@ + """ + + Combines several columns into a single vector-valued column by prefix. + + .. remarks:: + ``PrefixColumnConcatenator`` creates a single vector-valued column from + multiple + columns. It can be performed on data before training a model. The + concatenation + can significantly speed up the processing of data when the number of + columns + is as large as hundreds to thousands. + + :param columns: a dictionary of key-value pairs, where key is the output + column name and value is a list of input column names. + + * Only one key-value pair is allowed. + * Input column type: numeric or string. + * Output column type: + `Vector Type `_. + + The << operator can be used to set this value (see + `Column Operator `_) + + For example + * ColumnConcatenator(columns={'features': ['age', 'parity', + 'induced']}) + * ColumnConcatenator() << {'features': ['age', 'parity', + 'induced']}) + + For more details see `Columns `_. + + .. seealso:: + :py:class:`ColumnDropper + `, + :py:class:`ColumnSelector + `. + + .. index:: transform, schema + + Example: + .. literalinclude:: /../nimbusml/examples/PrefixColumnConcatenator.py + :language: python + """ diff --git a/src/python/docs/docstrings/Resizer.txt b/src/python/docs/docstrings/Resizer.txt index eb45128e..2bf9857f 100644 --- a/src/python/docs/docstrings/Resizer.txt +++ b/src/python/docs/docstrings/Resizer.txt @@ -1,15 +1,15 @@ """ - Resizers an image to a specified dimension using a specified + Resizes an image to a specified dimension using a specified resizing method. .. remarks:: - ``Resizer`` resizers an image to the specified height and width + ``Resizer`` resizes an image to the specified height and width using a specified resizing method. The input variables to this transforms must be images, typically the result of the ``Loader`` transform. - :param columns: a dictionary of key-value pairs, where key is the output + :param columns: A dictionary of key-value pairs, where key is the output column name and value is the input column name. * Multiple key-value pairs are allowed. diff --git a/src/python/docs/docstrings/ToKey.txt b/src/python/docs/docstrings/ToKey.txt index 2740561b..89a32047 100644 --- a/src/python/docs/docstrings/ToKey.txt +++ b/src/python/docs/docstrings/ToKey.txt @@ -1,7 +1,6 @@ """ - Text transforms that can be performed on data before training - a model. + Converts input values (words, numbers, etc.) to index in a dictionary. .. remarks:: The ``ToKey`` transform converts a column of text to key values diff --git a/src/python/nimbusml/_pipeline.py b/src/python/nimbusml/_pipeline.py index 71ee437d..8a2d88af 100644 --- a/src/python/nimbusml/_pipeline.py +++ b/src/python/nimbusml/_pipeline.py @@ -119,10 +119,10 @@ class Pipeline: for more details on how to select these. :param steps: the list of operator or (name, operator) tuples that - are chained in the appropriate order. + are chained in the appropriate order. :param model: the path to the model file (".zip") if want to load a - model directly from file (such as a trained model from ML.NET). + model directly from file (such as a trained model from ML.NET). :param random_state: the integer used as the random seed. diff --git a/src/python/nimbusml/ensemble/booster/_dart.py b/src/python/nimbusml/ensemble/booster/_dart.py index 1f818cc3..ec7ec351 100644 --- a/src/python/nimbusml/ensemble/booster/_dart.py +++ b/src/python/nimbusml/ensemble/booster/_dart.py @@ -24,9 +24,9 @@ class Dart(core): `_ is an ensemble method of boosted regression trees. The Dropouts meet Multiple Additive Regression - Trees (DART) employs dropouts in MART and overcomes the issues of over- + Trees (DART) employs dropouts in MART and overcomes the issues of over- specialization of MART, - achiving better performance in many tasks. + achieving better performance in many tasks. **Reference** diff --git a/src/python/nimbusml/ensemble/booster/_goss.py b/src/python/nimbusml/ensemble/booster/_goss.py index 2cecacac..24368238 100644 --- a/src/python/nimbusml/ensemble/booster/_goss.py +++ b/src/python/nimbusml/ensemble/booster/_goss.py @@ -22,9 +22,9 @@ class Goss(core): .. remarks:: Gradient-based One-Side Sampling (GOSS) employs an adaptive sampling named gradient-based - sampling. For datasets with large sample size, GOSS has considerable + sampling. For datasets with large sample size, GOSS has considerable advantage in terms of - statistical and computational efficiency. + statistical and computational efficiency. diff --git a/src/python/nimbusml/feature_extraction/image/_loader.py b/src/python/nimbusml/feature_extraction/image/_loader.py index c5bc6625..5921e104 100644 --- a/src/python/nimbusml/feature_extraction/image/_loader.py +++ b/src/python/nimbusml/feature_extraction/image/_loader.py @@ -20,7 +20,7 @@ class Loader(core, BaseTransform, TransformerMixin): """ - Loaders image data. + Loads image data. .. remarks:: ``Loader`` loads images from paths. diff --git a/src/python/nimbusml/feature_extraction/image/_resizer.py b/src/python/nimbusml/feature_extraction/image/_resizer.py index bd8a9008..6dcf1dd8 100644 --- a/src/python/nimbusml/feature_extraction/image/_resizer.py +++ b/src/python/nimbusml/feature_extraction/image/_resizer.py @@ -20,16 +20,16 @@ class Resizer(core, BaseTransform, TransformerMixin): """ - Resizers an image to a specified dimension using a specified + Resizes an image to a specified dimension using a specified resizing method. .. remarks:: - ``Resizer`` resizers an image to the specified height and width + ``Resizer`` resizes an image to the specified height and width using a specified resizing method. The input variables to this transforms must be images, typically the result of the ``Loader`` transform. - :param columns: a dictionary of key-value pairs, where key is the output + :param columns: A dictionary of key-value pairs, where key is the output column name and value is the input column name. * Multiple key-value pairs are allowed. diff --git a/src/python/nimbusml/feature_extraction/text/extractor/_ngram.py b/src/python/nimbusml/feature_extraction/text/extractor/_ngram.py index 0adb5bd1..4466d102 100644 --- a/src/python/nimbusml/feature_extraction/text/extractor/_ngram.py +++ b/src/python/nimbusml/feature_extraction/text/extractor/_ngram.py @@ -18,7 +18,7 @@ class Ngram(core): """ - Extracts NGrams from text and convert them to vector using + Extracts NGrams from text and converts them to vector using dictionary. .. remarks:: diff --git a/src/python/nimbusml/feature_extraction/text/extractor/_ngramhash.py b/src/python/nimbusml/feature_extraction/text/extractor/_ngramhash.py index 7f50d382..b6503426 100644 --- a/src/python/nimbusml/feature_extraction/text/extractor/_ngramhash.py +++ b/src/python/nimbusml/feature_extraction/text/extractor/_ngramhash.py @@ -18,7 +18,7 @@ class NgramHash(core): """ - Extracts NGrams from text and convert them to vector using hashing + Extracts NGrams from text and converts them to vector using hashing trick. .. remarks:: diff --git a/src/python/nimbusml/internal/core/ensemble/booster/_dart.py b/src/python/nimbusml/internal/core/ensemble/booster/_dart.py index dd4418d3..49297929 100644 --- a/src/python/nimbusml/internal/core/ensemble/booster/_dart.py +++ b/src/python/nimbusml/internal/core/ensemble/booster/_dart.py @@ -25,9 +25,9 @@ class Dart(Component): `_ is an ensemble method of boosted regression trees. The Dropouts meet Multiple Additive Regression - Trees (DART) employs dropouts in MART and overcomes the issues of over- + Trees (DART) employs dropouts in MART and overcomes the issues of over- specialization of MART, - achiving better performance in many tasks. + achieving better performance in many tasks. **Reference** diff --git a/src/python/nimbusml/internal/core/ensemble/booster/_goss.py b/src/python/nimbusml/internal/core/ensemble/booster/_goss.py index 694cb8bf..aa552afc 100644 --- a/src/python/nimbusml/internal/core/ensemble/booster/_goss.py +++ b/src/python/nimbusml/internal/core/ensemble/booster/_goss.py @@ -23,9 +23,9 @@ class Goss(Component): .. remarks:: Gradient-based One-Side Sampling (GOSS) employs an adaptive sampling named gradient-based - sampling. For datasets with large sample size, GOSS has considerable + sampling. For datasets with large sample size, GOSS has considerable advantage in terms of - statistical and computational efficiency. + statistical and computational efficiency. diff --git a/src/python/nimbusml/internal/core/feature_extraction/image/_loader.py b/src/python/nimbusml/internal/core/feature_extraction/image/_loader.py index ad8c70c1..888afab4 100644 --- a/src/python/nimbusml/internal/core/feature_extraction/image/_loader.py +++ b/src/python/nimbusml/internal/core/feature_extraction/image/_loader.py @@ -18,7 +18,7 @@ class Loader(BasePipelineItem, DefaultSignature): """ - Loaders image data. + Loads image data. .. remarks:: ``Loader`` loads images from paths. diff --git a/src/python/nimbusml/internal/core/feature_extraction/image/_resizer.py b/src/python/nimbusml/internal/core/feature_extraction/image/_resizer.py index 34ba1f39..819fb51c 100644 --- a/src/python/nimbusml/internal/core/feature_extraction/image/_resizer.py +++ b/src/python/nimbusml/internal/core/feature_extraction/image/_resizer.py @@ -18,11 +18,11 @@ class Resizer(BasePipelineItem, DefaultSignature): """ - Resizers an image to a specified dimension using a specified + Resizes an image to a specified dimension using a specified resizing method. .. remarks:: - ``Resizer`` resizers an image to the specified height and width + ``Resizer`` resizes an image to the specified height and width using a specified resizing method. The input variables to this transforms must be images, typically the result of the ``Loader`` transform. diff --git a/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngram.py b/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngram.py index 07fde941..a7292f9c 100644 --- a/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngram.py +++ b/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngram.py @@ -18,7 +18,7 @@ class Ngram(Component): """ - Extracts NGrams from text and convert them to vector using + Extracts NGrams from text and converts them to vector using dictionary. .. remarks:: diff --git a/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngramhash.py b/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngramhash.py index cd08b4be..04cb7713 100644 --- a/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngramhash.py +++ b/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngramhash.py @@ -18,7 +18,7 @@ class NgramHash(Component): """ - Extracts NGrams from text and convert them to vector using hashing + Extracts NGrams from text and converts them to vector using hashing trick. .. remarks:: diff --git a/src/python/nimbusml/internal/core/linear_model/_fastlinearbinaryclassifier.py b/src/python/nimbusml/internal/core/linear_model/_fastlinearbinaryclassifier.py index 3d43894a..f8346814 100644 --- a/src/python/nimbusml/internal/core/linear_model/_fastlinearbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/_fastlinearbinaryclassifier.py @@ -23,7 +23,7 @@ class FastLinearBinaryClassifier( """ A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer - for linear binary classification and regression. + for linear binary classification. .. remarks:: ``FastLinearBinaryClassifier`` is a trainer based on the Stochastic diff --git a/src/python/nimbusml/internal/core/linear_model/_fastlinearclassifier.py b/src/python/nimbusml/internal/core/linear_model/_fastlinearclassifier.py index b377eb11..4afcba87 100644 --- a/src/python/nimbusml/internal/core/linear_model/_fastlinearclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/_fastlinearclassifier.py @@ -22,7 +22,8 @@ class FastLinearClassifier( DefaultSignatureWithRoles): """ - Train an SDCA multi class model + A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer for + multi class classification. .. remarks:: ``FastLinearClassifier`` is a trainer based on the Stochastic Dual diff --git a/src/python/nimbusml/internal/core/linear_model/_fastlinearregressor.py b/src/python/nimbusml/internal/core/linear_model/_fastlinearregressor.py index f8a4b5ac..597e3dfb 100644 --- a/src/python/nimbusml/internal/core/linear_model/_fastlinearregressor.py +++ b/src/python/nimbusml/internal/core/linear_model/_fastlinearregressor.py @@ -23,7 +23,7 @@ class FastLinearRegressor( """ A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer - for linear binary classification and regression. + for linear regression. .. remarks:: ``FastLinearRegressor`` is a trainer based on the Stochastic Dual diff --git a/src/python/nimbusml/internal/core/preprocessing/_fromkey.py b/src/python/nimbusml/internal/core/preprocessing/_fromkey.py index bd5cfe10..ef7f8efb 100644 --- a/src/python/nimbusml/internal/core/preprocessing/_fromkey.py +++ b/src/python/nimbusml/internal/core/preprocessing/_fromkey.py @@ -19,8 +19,7 @@ class FromKey(BasePipelineItem, DefaultSignature): """ - Text transforms that can be performed on data before training - a model. + Converts the key types back to their original values. .. remarks:: The ``FromKey`` transform converts a column of keys, generated using diff --git a/src/python/nimbusml/internal/core/preprocessing/_tokey.py b/src/python/nimbusml/internal/core/preprocessing/_tokey.py index 55cd7200..b1295adf 100644 --- a/src/python/nimbusml/internal/core/preprocessing/_tokey.py +++ b/src/python/nimbusml/internal/core/preprocessing/_tokey.py @@ -19,8 +19,7 @@ class ToKey(BasePipelineItem, DefaultSignature): """ - Text transforms that can be performed on data before training - a model. + Converts input values (words, numbers, etc.) to index in a dictionary. .. remarks:: The ``ToKey`` transform converts a column of text to key values diff --git a/src/python/nimbusml/internal/core/preprocessing/missing_values/_handler.py b/src/python/nimbusml/internal/core/preprocessing/missing_values/_handler.py index 55f0ed01..3fd199aa 100644 --- a/src/python/nimbusml/internal/core/preprocessing/missing_values/_handler.py +++ b/src/python/nimbusml/internal/core/preprocessing/missing_values/_handler.py @@ -33,14 +33,13 @@ class Handler(BasePipelineItem, DefaultSignature): were imputed. This works for columns that have numeric type. :param replace_with: The method to use to replace NaN values. The - following choices are available. - - * Def: Replace with default value of that type, usually ``0``. If no - replace - method is specified, this is the default strategy. - * Mean: Replace NaN values with the mean of the values in that column. - * Min: Replace with minimum value in the column. - * Max: Replace with maximum value in the column. + following choices are available. + + * Def: Replace with default value of that type, usually ``0``. If no + replace method is specified, this is the default strategy. + * Mean: Replace NaN values with the mean of the values in that column. + * Min: Replace with minimum value in the column. + * Max: Replace with maximum value in the column. :param impute_by_slot: Whether to impute values by slot. diff --git a/src/python/nimbusml/internal/core/preprocessing/schema/_prefixcolumnconcatenator.py b/src/python/nimbusml/internal/core/preprocessing/schema/_prefixcolumnconcatenator.py index d202e947..003e909f 100644 --- a/src/python/nimbusml/internal/core/preprocessing/schema/_prefixcolumnconcatenator.py +++ b/src/python/nimbusml/internal/core/preprocessing/schema/_prefixcolumnconcatenator.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------------------------- +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ PrefixColumnConcatenator """ @@ -15,10 +16,12 @@ from ...base_pipeline_item import BasePipelineItem, DefaultSignature -class PrefixColumnConcatenator(BasePipelineItem, DefaultSignature): +class PrefixColumnConcatenator( + BasePipelineItem, + DefaultSignature): """ - Combines several columns into a single vector-valued column by prefix + Combines several columns into a single vector-valued column by prefix. .. remarks:: ``PrefixColumnConcatenator`` creates a single vector-valued column from diff --git a/src/python/nimbusml/linear_model/_fastlinearbinaryclassifier.py b/src/python/nimbusml/linear_model/_fastlinearbinaryclassifier.py index 4793ce51..d3ce34d6 100644 --- a/src/python/nimbusml/linear_model/_fastlinearbinaryclassifier.py +++ b/src/python/nimbusml/linear_model/_fastlinearbinaryclassifier.py @@ -23,7 +23,7 @@ class FastLinearBinaryClassifier( """ A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer - for linear binary classification and regression. + for linear binary classification. .. remarks:: ``FastLinearBinaryClassifier`` is a trainer based on the Stochastic diff --git a/src/python/nimbusml/linear_model/_fastlinearclassifier.py b/src/python/nimbusml/linear_model/_fastlinearclassifier.py index 83849084..9cee4125 100644 --- a/src/python/nimbusml/linear_model/_fastlinearclassifier.py +++ b/src/python/nimbusml/linear_model/_fastlinearclassifier.py @@ -21,7 +21,8 @@ class FastLinearClassifier(core, BasePredictor, ClassifierMixin): """ - Train an SDCA multi class model + A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer for + multi class classification. .. remarks:: ``FastLinearClassifier`` is a trainer based on the Stochastic Dual diff --git a/src/python/nimbusml/linear_model/_fastlinearregressor.py b/src/python/nimbusml/linear_model/_fastlinearregressor.py index 00952630..eb43dfc3 100644 --- a/src/python/nimbusml/linear_model/_fastlinearregressor.py +++ b/src/python/nimbusml/linear_model/_fastlinearregressor.py @@ -22,7 +22,7 @@ class FastLinearRegressor(core, BasePredictor, RegressorMixin): """ A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer - for linear binary classification and regression. + for linear regression. .. remarks:: ``FastLinearRegressor`` is a trainer based on the Stochastic Dual diff --git a/src/python/nimbusml/model_selection/_cv.py b/src/python/nimbusml/model_selection/_cv.py index 79a5def4..44c4275a 100644 --- a/src/python/nimbusml/model_selection/_cv.py +++ b/src/python/nimbusml/model_selection/_cv.py @@ -96,7 +96,7 @@ class CV: the average of each metric on all models. :param pipeline: Pipeline object or a list of pipeline steps that's - used for cross validation + used for cross validation ''' fold_column_name = 'Fold' diff --git a/src/python/nimbusml/preprocessing/_fromkey.py b/src/python/nimbusml/preprocessing/_fromkey.py index 29319966..86d117dd 100644 --- a/src/python/nimbusml/preprocessing/_fromkey.py +++ b/src/python/nimbusml/preprocessing/_fromkey.py @@ -20,8 +20,7 @@ class FromKey(core, BaseTransform, TransformerMixin): """ - Text transforms that can be performed on data before training - a model. + Converts the key types back to their original values. .. remarks:: The ``FromKey`` transform converts a column of keys, generated using diff --git a/src/python/nimbusml/preprocessing/_tokey.py b/src/python/nimbusml/preprocessing/_tokey.py index 3bd95c43..315d57cf 100644 --- a/src/python/nimbusml/preprocessing/_tokey.py +++ b/src/python/nimbusml/preprocessing/_tokey.py @@ -20,8 +20,7 @@ class ToKey(core, BaseTransform, TransformerMixin): """ - Text transforms that can be performed on data before training - a model. + Converts input values (words, numbers, etc.) to index in a dictionary. .. remarks:: The ``ToKey`` transform converts a column of text to key values diff --git a/src/python/nimbusml/preprocessing/missing_values/_handler.py b/src/python/nimbusml/preprocessing/missing_values/_handler.py index 095ddb36..232c284a 100644 --- a/src/python/nimbusml/preprocessing/missing_values/_handler.py +++ b/src/python/nimbusml/preprocessing/missing_values/_handler.py @@ -54,14 +54,13 @@ class Handler(core, BaseTransform, TransformerMixin): For more details see `Columns `_. :param replace_with: The method to use to replace NaN values. The - following choices are available. - - * Def: Replace with default value of that type, usually ``0``. If no - replace - method is specified, this is the default strategy. - * Mean: Replace NaN values with the mean of the values in that column. - * Min: Replace with minimum value in the column. - * Max: Replace with maximum value in the column. + following choices are available. + + * Def: Replace with default value of that type, usually ``0``. If no + replace method is specified, this is the default strategy. + * Mean: Replace NaN values with the mean of the values in that column. + * Min: Replace with minimum value in the column. + * Max: Replace with maximum value in the column. :param impute_by_slot: Whether to impute values by slot. diff --git a/src/python/nimbusml/preprocessing/schema/_prefixcolumnconcatenator.py b/src/python/nimbusml/preprocessing/schema/_prefixcolumnconcatenator.py index d526f5cd..c5b89adc 100644 --- a/src/python/nimbusml/preprocessing/schema/_prefixcolumnconcatenator.py +++ b/src/python/nimbusml/preprocessing/schema/_prefixcolumnconcatenator.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------------------------- +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ PrefixColumnConcatenator """ diff --git a/src/python/tools/code_fixer.py b/src/python/tools/code_fixer.py index 21b6d1f4..3aa233ac 100644 --- a/src/python/tools/code_fixer.py +++ b/src/python/tools/code_fixer.py @@ -247,12 +247,18 @@ def fix_code(class_name, filename): all_args['output_for_sub_graph'] = {'Model' : \ all_args['predictor_model']}""" +prefixcolumnconcatenator_1 = "output_columns = input_columns" +prefixcolumnconcatenator_1_correct = """raise ValueError( + "'None' output passed when it cannot be none.")""" + signature_fixes_core = { 'NGramFeaturizer': (textTransform_1, textTransform_1_correct), 'ColumnConcatenator': [(concatColumns_1, concatColumns_1_correct)], 'ColumnSelector': [(columnselector_1, columnselector_1_correct)], 'OneVsRestClassifier': [ (onevsrestclassifier_1, onevsrestclassifier_1_correct)], + 'PrefixColumnConcatenator': (prefixcolumnconcatenator_1, + prefixcolumnconcatenator_1_correct) } diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json index 68ab2fa5..a8cd262e 100644 --- a/src/python/tools/manifest_diff.json +++ b/src/python/tools/manifest_diff.json @@ -785,6 +785,12 @@ "NewName": "TypeConverter", "Module": "preprocessing.schema", "Type": "Transform" + }, + { + "Name": "Transforms.PrefixColumnConcatenator", + "NewName": "PrefixColumnConcatenator", + "Module": "preprocessing.schema", + "Type": "Transform" } ], "Components": [ diff --git a/src/python/tools/temp_docs_updater.py b/src/python/tools/temp_docs_updater.py index ccb92b6b..3915d19e 100644 --- a/src/python/tools/temp_docs_updater.py +++ b/src/python/tools/temp_docs_updater.py @@ -13,7 +13,7 @@ from code_fixer import run_autopep -description = """ +description = """ This module helps with merging the changes from the master branch in to the temp/docs branch. Here are the steps it takes: