microsoft · najeeb-kazmi · Dec 2, 2019 · Nov 22, 2019 · ganik · Nov 25, 2019
diff --git a/.gitignore b/.gitignore
@@ -348,9 +348,3 @@ data.csv
 data.txt
 
 /build/TestCoverageReport
-
-# The folder generated by make_yaml.bat
-*_build
-*mymodeluci.zip
-build/sphinxmdoutput-0.2.4.1-py3-none-any.whl
-*build
diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ To build `nimbusml` from source please visit our [developer guide](docs/develope
 
 ## Contributing
 
-The contributions guide can be found [here](CONTRIBUTING.md). Given the experimental nature of this project, support will be provided on a best-effort basis. We suggest opening an issue for discussion before starting a PR with big changes.
+The contributions guide can be found [here](CONTRIBUTING.md). 
 
 ## Support
 

diff --git a/src/python/docs/docstrings/Dart.txt b/src/python/docs/docstrings/Dart.txt
@@ -7,9 +7,9 @@
         <https://arxiv.org/abs/1505.01866>`_ is an
         ensemble method of boosted regression trees. The Dropouts meet
         Multiple Additive Regression
-	Trees (DART) employs dropouts in MART and overcomes the issues of over-
+        Trees (DART) employs dropouts in MART and overcomes the issues of over-
         specialization of MART,
-	achiving better performance in many tasks.
+        achieving better performance in many tasks.
 
 
         **Reference**

diff --git a/src/python/docs/docstrings/FastLinearBinaryClassifier.txt b/src/python/docs/docstrings/FastLinearBinaryClassifier.txt
@@ -1,7 +1,7 @@
     """
 
     A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer
-    for linear binary classification and regression.
+    for linear binary classification.
 
     .. remarks::
         ``FastLinearBinaryClassifier`` is a trainer based on the Stochastic

diff --git a/src/python/docs/docstrings/FastLinearClassifier.txt b/src/python/docs/docstrings/FastLinearClassifier.txt
@@ -1,6 +1,7 @@
     """
 
-    Train an SDCA multi class model
+    A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer for
+    multi class classification.
 
     .. remarks::
         ``FastLinearClassifier`` is a trainer based on the Stochastic Dual

diff --git a/src/python/docs/docstrings/FastLinearRegressor.txt b/src/python/docs/docstrings/FastLinearRegressor.txt
@@ -1,7 +1,7 @@
     """
 
     A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer
-    for linear binary classification and regression.
+    for linear regression.
 
     .. remarks::
         ``FastLinearRegressor`` is a trainer based on the Stochastic Dual

diff --git a/src/python/docs/docstrings/FromKey.txt b/src/python/docs/docstrings/FromKey.txt
@@ -1,7 +1,6 @@
     """
 
-    Text transforms that can be performed on data before training
-    a model.
+    Converts the key types back to their original values.
 
     .. remarks::
         The ``FromKey`` transform converts a column of keys, generated using

diff --git a/src/python/docs/docstrings/Goss.txt b/src/python/docs/docstrings/Goss.txt
@@ -5,9 +5,9 @@
     .. remarks::
         Gradient-based One-Side Sampling (GOSS) employs an adaptive sampling
         named gradient-based
-	sampling. For datasets with large sample size, GOSS has considerable
+        sampling. For datasets with large sample size, GOSS has considerable
         advantage in terms of
-	statistical and computational efficiency.	
+        statistical and computational efficiency.
 
 
 

diff --git a/src/python/docs/docstrings/Handler.txt b/src/python/docs/docstrings/Handler.txt
@@ -33,14 +33,13 @@
         For more details see `Columns </nimbusml/concepts/columns>`_.
 
     :param replace_with: The method to use to replace NaN values. The
-    following choices are available.
-
-       * Def: Replace with default value of that type, usually ``0``. If no
-        replace
-       method is specified, this is the default strategy.
-       * Mean: Replace NaN values with the mean of the values in that column.
-       * Min: Replace with minimum value in the column.
-       * Max: Replace with maximum value in the column.
+        following choices are available.
+
+        * Def: Replace with default value of that type, usually ``0``. If no
+        replace method is specified, this is the default strategy.
+        * Mean: Replace NaN values with the mean of the values in that column.
+        * Min: Replace with minimum value in the column.
+        * Max: Replace with maximum value in the column.
 
     .. seealso::
         :py:class:`Filter <nimbusml.preprocessing.missing_values.Filter>`,

diff --git a/src/python/docs/docstrings/Loader.txt b/src/python/docs/docstrings/Loader.txt
@@ -1,6 +1,6 @@
     """
 
-    Loaders image data.
+    Loads image data.
 
     .. remarks::
         ``Loader`` loads images from paths.

diff --git a/src/python/docs/docstrings/NGram.txt b/src/python/docs/docstrings/NGram.txt
@@ -1,6 +1,6 @@
     """
 
-    Extracts NGrams from text and convert them to vector using
+    Extracts NGrams from text and converts them to vector using
     dictionary.
 
     .. remarks::

diff --git a/src/python/docs/docstrings/NgramHash.txt b/src/python/docs/docstrings/NgramHash.txt
@@ -1,6 +1,6 @@
     """
 
-    Extracts NGrams from text and convert them to vector using hashing
+    Extracts NGrams from text and converts them to vector using hashing
     trick.
 
     .. remarks::

diff --git a/src/python/docs/docstrings/PrefixColumnConcatenator.txt b/src/python/docs/docstrings/PrefixColumnConcatenator.txt
@@ -0,0 +1,44 @@
+    """
+
+    Combines several columns into a single vector-valued column by prefix.
+
+    .. remarks::
+        ``PrefixColumnConcatenator`` creates a single vector-valued column from
+        multiple
+        columns. It can be performed on data before training a model. The
+        concatenation
+        can significantly speed up the processing of data when the number of
+        columns
+        is as large as hundreds to thousands.
+
+    :param columns: a dictionary of key-value pairs, where key is the output
+        column name and value is a list of input column names.
+
+         * Only one key-value pair is allowed.
+         * Input column type: numeric or string.
+         * Output column type:
+        `Vector Type </nimbusml/concepts/types#vectortype-column>`_.
+
+        The << operator can be used to set this value (see
+        `Column Operator </nimbusml/concepts/columns>`_)
+
+        For example
+         * ColumnConcatenator(columns={'features': ['age', 'parity',
+        'induced']})
+         * ColumnConcatenator() << {'features': ['age', 'parity',
+        'induced']})
+
+        For more details see `Columns </nimbusml/concepts/columns>`_.
+
+    .. seealso::
+        :py:class:`ColumnDropper
+        <nimbusml.preprocessing.schema.ColumnDropper>`,
+        :py:class:`ColumnSelector
+        <nimbusml.preprocessing.schema.ColumnSelector>`.
+
+    .. index:: transform, schema
+
+    Example:
+       .. literalinclude:: /../nimbusml/examples/PrefixColumnConcatenator.py
+              :language: python
+    """
diff --git a/src/python/docs/docstrings/Resizer.txt b/src/python/docs/docstrings/Resizer.txt
@@ -1,15 +1,15 @@
     """
 
-    Resizers an image to a specified dimension using a specified
+    Resizes an image to a specified dimension using a specified
     resizing method.
 
     .. remarks::
-        ``Resizer`` resizers an image to the specified height and width
+        ``Resizer`` resizes an image to the specified height and width
         using a specified resizing method. The input variables to this
         transforms must
         be images, typically the result of the ``Loader`` transform.
 
-    :param columns: a dictionary of key-value pairs, where key is the output
+    :param columns: A dictionary of key-value pairs, where key is the output
         column name and value is the input column name.
 
         * Multiple key-value pairs are allowed.

diff --git a/src/python/docs/docstrings/ToKey.txt b/src/python/docs/docstrings/ToKey.txt
@@ -1,7 +1,6 @@
     """
 
-    Text transforms that can be performed on data before training
-    a model.
+    Converts input values (words, numbers, etc.) to index in a dictionary.
 
     .. remarks::
         The ``ToKey`` transform converts a column of text to key values

diff --git a/src/python/nimbusml/_pipeline.py b/src/python/nimbusml/_pipeline.py
@@ -119,10 +119,10 @@ class Pipeline:
         for more details on how to select these.
 
     :param steps: the list of operator or (name, operator) tuples  that
-    are chained in the appropriate order.
+        are chained in the appropriate order.
 
     :param model: the path to the model file (".zip") if want to load a
-    model directly from file (such as a trained model from ML.NET).
+        model directly from file (such as a trained model from ML.NET).
 
     :param random_state: the integer used as the random seed.
 

diff --git a/src/python/nimbusml/ensemble/booster/_dart.py b/src/python/nimbusml/ensemble/booster/_dart.py
@@ -24,9 +24,9 @@ class Dart(core):
         <https://arxiv.org/abs/1505.01866>`_ is an
         ensemble method of boosted regression trees. The Dropouts meet
         Multiple Additive Regression
-    Trees (DART) employs dropouts in MART and overcomes the issues of over-
+        Trees (DART) employs dropouts in MART and overcomes the issues of over-
         specialization of MART,
-    achiving better performance in many tasks.
+        achieving better performance in many tasks.
 
 
         **Reference**

diff --git a/src/python/nimbusml/ensemble/booster/_goss.py b/src/python/nimbusml/ensemble/booster/_goss.py
@@ -22,9 +22,9 @@ class Goss(core):
     .. remarks::
         Gradient-based One-Side Sampling (GOSS) employs an adaptive sampling
         named gradient-based
-    sampling. For datasets with large sample size, GOSS has considerable
+        sampling. For datasets with large sample size, GOSS has considerable
         advantage in terms of
-    statistical and computational efficiency.
+        statistical and computational efficiency.
 
 
 

diff --git a/src/python/nimbusml/feature_extraction/image/_loader.py b/src/python/nimbusml/feature_extraction/image/_loader.py
@@ -20,7 +20,7 @@
 class Loader(core, BaseTransform, TransformerMixin):
     """
 
-    Loaders image data.
+    Loads image data.
 
     .. remarks::
         ``Loader`` loads images from paths.

diff --git a/src/python/nimbusml/feature_extraction/image/_resizer.py b/src/python/nimbusml/feature_extraction/image/_resizer.py
@@ -20,16 +20,16 @@
 class Resizer(core, BaseTransform, TransformerMixin):
     """
 
-    Resizers an image to a specified dimension using a specified
+    Resizes an image to a specified dimension using a specified
     resizing method.
 
     .. remarks::
-        ``Resizer`` resizers an image to the specified height and width
+        ``Resizer`` resizes an image to the specified height and width
         using a specified resizing method. The input variables to this
         transforms must
         be images, typically the result of the ``Loader`` transform.
 
-    :param columns: a dictionary of key-value pairs, where key is the output
+    :param columns: A dictionary of key-value pairs, where key is the output
         column name and value is the input column name.
 
         * Multiple key-value pairs are allowed.

diff --git a/src/python/nimbusml/feature_extraction/text/extractor/_ngram.py b/src/python/nimbusml/feature_extraction/text/extractor/_ngram.py
@@ -18,7 +18,7 @@
 class Ngram(core):
     """
 
-    Extracts NGrams from text and convert them to vector using
+    Extracts NGrams from text and converts them to vector using
     dictionary.
 
     .. remarks::

diff --git a/src/python/nimbusml/feature_extraction/text/extractor/_ngramhash.py b/src/python/nimbusml/feature_extraction/text/extractor/_ngramhash.py
@@ -18,7 +18,7 @@
 class NgramHash(core):
     """
 
-    Extracts NGrams from text and convert them to vector using hashing
+    Extracts NGrams from text and converts them to vector using hashing
     trick.
 
     .. remarks::

diff --git a/src/python/nimbusml/internal/core/ensemble/booster/_dart.py b/src/python/nimbusml/internal/core/ensemble/booster/_dart.py
@@ -25,9 +25,9 @@ class Dart(Component):
         <https://arxiv.org/abs/1505.01866>`_ is an
         ensemble method of boosted regression trees. The Dropouts meet
         Multiple Additive Regression
-    Trees (DART) employs dropouts in MART and overcomes the issues of over-
+        Trees (DART) employs dropouts in MART and overcomes the issues of over-
         specialization of MART,
-    achiving better performance in many tasks.
+        achieving better performance in many tasks.
 
 
         **Reference**

diff --git a/src/python/nimbusml/internal/core/ensemble/booster/_goss.py b/src/python/nimbusml/internal/core/ensemble/booster/_goss.py
@@ -23,9 +23,9 @@ class Goss(Component):
     .. remarks::
         Gradient-based One-Side Sampling (GOSS) employs an adaptive sampling
         named gradient-based
-    sampling. For datasets with large sample size, GOSS has considerable
+        sampling. For datasets with large sample size, GOSS has considerable
         advantage in terms of
-    statistical and computational efficiency.
+        statistical and computational efficiency.
 
 
 

diff --git a/src/python/nimbusml/internal/core/feature_extraction/image/_loader.py b/src/python/nimbusml/internal/core/feature_extraction/image/_loader.py
@@ -18,7 +18,7 @@
 class Loader(BasePipelineItem, DefaultSignature):
     """
 
-    Loaders image data.
+    Loads image data.
 
     .. remarks::
         ``Loader`` loads images from paths.

diff --git a/src/python/nimbusml/internal/core/feature_extraction/image/_resizer.py b/src/python/nimbusml/internal/core/feature_extraction/image/_resizer.py
@@ -18,11 +18,11 @@
 class Resizer(BasePipelineItem, DefaultSignature):
     """
 
-    Resizers an image to a specified dimension using a specified
+    Resizes an image to a specified dimension using a specified
     resizing method.
 
     .. remarks::
-        ``Resizer`` resizers an image to the specified height and width
+        ``Resizer`` resizes an image to the specified height and width
         using a specified resizing method. The input variables to this
         transforms must
         be images, typically the result of the ``Loader`` transform.

diff --git a/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngram.py b/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngram.py
@@ -18,7 +18,7 @@
 class Ngram(Component):
     """
 
-    Extracts NGrams from text and convert them to vector using
+    Extracts NGrams from text and converts them to vector using
     dictionary.
 
     .. remarks::

diff --git a/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngramhash.py b/src/python/nimbusml/internal/core/feature_extraction/text/extractor/_ngramhash.py
@@ -18,7 +18,7 @@
 class NgramHash(Component):
     """
 
-    Extracts NGrams from text and convert them to vector using hashing
+    Extracts NGrams from text and converts them to vector using hashing
     trick.
 
     .. remarks::

diff --git a/src/python/nimbusml/internal/core/linear_model/_fastlinearbinaryclassifier.py b/src/python/nimbusml/internal/core/linear_model/_fastlinearbinaryclassifier.py
@@ -23,7 +23,7 @@ class FastLinearBinaryClassifier(
     """
 
     A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer
-    for linear binary classification and regression.
+    for linear binary classification.
 
     .. remarks::
         ``FastLinearBinaryClassifier`` is a trainer based on the Stochastic

diff --git a/src/python/nimbusml/internal/core/linear_model/_fastlinearclassifier.py b/src/python/nimbusml/internal/core/linear_model/_fastlinearclassifier.py
@@ -22,7 +22,8 @@ class FastLinearClassifier(
         DefaultSignatureWithRoles):
     """
 
-    Train an SDCA multi class model
+    A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer for
+    multi class classification.
 
     .. remarks::
         ``FastLinearClassifier`` is a trainer based on the Stochastic Dual

diff --git a/src/python/nimbusml/internal/core/linear_model/_fastlinearregressor.py b/src/python/nimbusml/internal/core/linear_model/_fastlinearregressor.py
@@ -23,7 +23,7 @@ class FastLinearRegressor(
     """
 
     A Stochastic Dual Coordinate Ascent (SDCA) optimization trainer
-    for linear binary classification and regression.
+    for linear regression.
 
     .. remarks::
         ``FastLinearRegressor`` is a trainer based on the Stochastic Dual

diff --git a/src/python/nimbusml/internal/core/preprocessing/_fromkey.py b/src/python/nimbusml/internal/core/preprocessing/_fromkey.py
@@ -19,8 +19,7 @@
 class FromKey(BasePipelineItem, DefaultSignature):
     """
 
-    Text transforms that can be performed on data before training
-    a model.
+    Converts the key types back to their original values.
 
     .. remarks::
         The ``FromKey`` transform converts a column of keys, generated using

diff --git a/src/python/nimbusml/internal/core/preprocessing/_tokey.py b/src/python/nimbusml/internal/core/preprocessing/_tokey.py
@@ -19,8 +19,7 @@
 class ToKey(BasePipelineItem, DefaultSignature):
     """
 
-    Text transforms that can be performed on data before training
-    a model.
+    Converts input values (words, numbers, etc.) to index in a dictionary.
 
     .. remarks::
         The ``ToKey`` transform converts a column of text to key values