diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index 51c50ea2..62c630ee 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -279,7 +279,7 @@ def nodes(self): @property def last_node(self): - if len(self.steps) <= 0: + if not self.steps: raise TypeError("No steps given.") last_step = self.steps[-1] return last_step if not isinstance(last_step, tuple) else \ @@ -2002,7 +2002,7 @@ def _predict(self, X, y=None, return out_data, out_metrics def _extract_classes(self, y): - if ((len(self.steps) > 0) and + if (self.steps and (self.last_node.type in ['classifier', 'anomaly']) and (y is not None) and (not isinstance(y, (str, tuple)))): @@ -2015,7 +2015,10 @@ def _extract_classes(self, y): self._add_classes(unique_classes) def _extract_classes_from_headers(self, headers): - if hasattr(self.last_node, 'classes_'): + # Note: _classes can not be added to the Pipeline unless + # it already exists in the predictor node because the + # dtype is required to set the correct type. + if self.steps and hasattr(self.last_node, 'classes_'): classes = [x.replace('Score.', '') for x in headers] classes = np.array(classes).astype(self.last_node.classes_.dtype) self._add_classes(classes) @@ -2024,7 +2027,9 @@ def _add_classes(self, classes): # Create classes_ attribute similar to scikit # Add both to pipeline and ending classifier self.classes_ = classes - self.last_node.classes_ = classes + + if self.steps: + self.last_node.classes_ = classes @trace def predict(self, X, verbose=0, as_binary_data_stream=False, **params): diff --git a/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py b/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py index 2eb77763..f1fc2ec7 100644 --- a/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py +++ b/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py @@ -11,7 +11,10 @@ from nimbusml.datasets import get_dataset from nimbusml.feature_extraction.categorical import OneHotVectorizer from nimbusml.linear_model import LogisticRegressionBinaryClassifier, OnlineGradientDescentRegressor +from nimbusml.multiclass import OneVsRestClassifier from nimbusml.preprocessing.filter import RangeFilter +from nimbusml.preprocessing import DatasetTransformer +from nimbusml.preprocessing.schema import PrefixColumnConcatenator seed = 0 @@ -429,6 +432,30 @@ def test_combined_models_support_predict_proba(self): self.assertTrue(np.array_equal(result_1, result_2)) + def test_combined_models_support_predict_proba_with_more_than_2_classes(self): + path = get_dataset('infert').as_filepath() + data = FileDataStream.read_csv(path) + + featurization_pipeline = Pipeline([OneHotVectorizer(columns={'education': 'education'})]) + featurization_pipeline.fit(data) + featurized_data = featurization_pipeline.transform(data) + + feature_cols = ['education', 'age'] + training_pipeline = Pipeline([DatasetTransformer(featurization_pipeline.model), OneVsRestClassifier(LogisticRegressionBinaryClassifier(), feature=feature_cols, label='induced')]) + training_pipeline.fit(data, output_predictor_model=True) + + concat_pipeline = Pipeline([PrefixColumnConcatenator({'education': 'education.'})]) + concat_pipeline.fit(featurized_data) + + predictor_pipeline = Pipeline() + predictor_pipeline.load_model(training_pipeline.predictor_model) + + concat_and_predictor_pipeline = Pipeline.combine_models(concat_pipeline, predictor_pipeline) + + result = concat_and_predictor_pipeline.predict_proba(featurized_data) + self.assertEqual(result.shape[1], 3) + + def test_combined_models_support_decision_function(self): path = get_dataset('infert').as_filepath()