Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/python/nimbusml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def nodes(self):

@property
def last_node(self):
if len(self.steps) <= 0:
if not self.steps:
raise TypeError("No steps given.")
last_step = self.steps[-1]
return last_step if not isinstance(last_step, tuple) else \
Expand Down Expand Up @@ -2002,7 +2002,7 @@ def _predict(self, X, y=None,
return out_data, out_metrics

def _extract_classes(self, y):
if ((len(self.steps) > 0) and
if (self.steps and
(self.last_node.type in ['classifier', 'anomaly']) and
(y is not None) and
(not isinstance(y, (str, tuple)))):
Expand All @@ -2015,7 +2015,10 @@ def _extract_classes(self, y):
self._add_classes(unique_classes)

def _extract_classes_from_headers(self, headers):
if hasattr(self.last_node, 'classes_'):
# Note: _classes can not be added to the Pipeline unless
# it already exists in the predictor node because the
# dtype is required to set the correct type.
if self.steps and hasattr(self.last_node, 'classes_'):
classes = [x.replace('Score.', '') for x in headers]
classes = np.array(classes).astype(self.last_node.classes_.dtype)
self._add_classes(classes)
Expand All @@ -2024,7 +2027,9 @@ def _add_classes(self, classes):
# Create classes_ attribute similar to scikit
# Add both to pipeline and ending classifier
self.classes_ = classes
self.last_node.classes_ = classes

if self.steps:
self.last_node.classes_ = classes

@trace
def predict(self, X, verbose=0, as_binary_data_stream=False, **params):
Expand Down
27 changes: 27 additions & 0 deletions src/python/nimbusml/tests/pipeline/test_pipeline_combining.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
from nimbusml.datasets import get_dataset
from nimbusml.feature_extraction.categorical import OneHotVectorizer
from nimbusml.linear_model import LogisticRegressionBinaryClassifier, OnlineGradientDescentRegressor
from nimbusml.multiclass import OneVsRestClassifier
from nimbusml.preprocessing.filter import RangeFilter
from nimbusml.preprocessing import DatasetTransformer
from nimbusml.preprocessing.schema import PrefixColumnConcatenator

seed = 0

Expand Down Expand Up @@ -429,6 +432,30 @@ def test_combined_models_support_predict_proba(self):
self.assertTrue(np.array_equal(result_1, result_2))


def test_combined_models_support_predict_proba_with_more_than_2_classes(self):
path = get_dataset('infert').as_filepath()
data = FileDataStream.read_csv(path)

featurization_pipeline = Pipeline([OneHotVectorizer(columns={'education': 'education'})])
featurization_pipeline.fit(data)
featurized_data = featurization_pipeline.transform(data)

feature_cols = ['education', 'age']
training_pipeline = Pipeline([DatasetTransformer(featurization_pipeline.model), OneVsRestClassifier(LogisticRegressionBinaryClassifier(), feature=feature_cols, label='induced')])
training_pipeline.fit(data, output_predictor_model=True)

concat_pipeline = Pipeline([PrefixColumnConcatenator({'education': 'education.'})])
concat_pipeline.fit(featurized_data)

predictor_pipeline = Pipeline()
predictor_pipeline.load_model(training_pipeline.predictor_model)

concat_and_predictor_pipeline = Pipeline.combine_models(concat_pipeline, predictor_pipeline)

result = concat_and_predictor_pipeline.predict_proba(featurized_data)
self.assertEqual(result.shape[1], 3)


def test_combined_models_support_decision_function(self):
path = get_dataset('infert').as_filepath()

Expand Down