diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj index c9c73556..fb4a8e03 100644 --- a/src/python/nimbusml.pyproj +++ b/src/python/nimbusml.pyproj @@ -67,9 +67,7 @@ - - Code - + diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index 692e1dea..c1143160 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -19,6 +19,7 @@ from scipy.sparse import csr_matrix from sklearn.utils.validation import check_X_y, check_array from sklearn.utils.multiclass import unique_labels +from zipfile import ZipFile from .internal.core.base_pipeline_item import BasePipelineItem from .internal.entrypoints.data_customtextloader import \ @@ -40,6 +41,8 @@ from .internal.entrypoints.models_summarizer import models_summarizer from .internal.entrypoints.transforms_datasetscorer import \ transforms_datasetscorer +from .internal.entrypoints.transforms_datasettransformscorer import \ + transforms_datasettransformscorer from .internal.entrypoints.transforms_featurecombiner import \ transforms_featurecombiner from .internal.entrypoints.transforms_featurecontributioncalculationtransformer import \ @@ -1816,22 +1819,44 @@ def _predict(self, X, y=None, isinstance(X, DataFrame) and isinstance(y, (str, tuple))): y = y_temp + is_transformer_chain = False + with ZipFile(self.model) as model_zip: + is_transformer_chain = any('TransformerChain' in item + for item in model_zip.namelist()) + all_nodes = [] - inputs = dict([('data', ''), ('predictor_model', self.model)]) - if isinstance(X, FileDataStream): - importtext_node = data_customtextloader( - input_file="$file", + if is_transformer_chain: + inputs = dict([('data', ''), ('transform_model', self.model)]) + if isinstance(X, FileDataStream): + importtext_node = data_customtextloader( + input_file="$file", + data="$data", + custom_schema=schema.to_string( + add_sep=True)) + all_nodes = [importtext_node] + inputs = dict([('file', ''), ('transform_model', self.model)]) + + score_node = transforms_datasettransformscorer( data="$data", - custom_schema=schema.to_string( - add_sep=True)) - all_nodes = [importtext_node] - inputs = dict([('file', ''), ('predictor_model', self.model)]) - - score_node = transforms_datasetscorer( - data="$data", - predictor_model="$predictor_model", - scored_data="$scoredVectorData") - all_nodes.extend([score_node]) + transform_model="$transform_model", + scored_data="$scoredVectorData") + all_nodes.extend([score_node]) + else: + inputs = dict([('data', ''), ('predictor_model', self.model)]) + if isinstance(X, FileDataStream): + importtext_node = data_customtextloader( + input_file="$file", + data="$data", + custom_schema=schema.to_string( + add_sep=True)) + all_nodes = [importtext_node] + inputs = dict([('file', ''), ('predictor_model', self.model)]) + + score_node = transforms_datasetscorer( + data="$data", + predictor_model="$predictor_model", + scored_data="$scoredVectorData") + all_nodes.extend([score_node]) if (evaltype in ['binary', 'multiclass']) or \ (hasattr(self, 'steps') @@ -1889,6 +1914,10 @@ def _predict(self, X, y=None, self._run_time = time.time() - start_time raise e + if is_transformer_chain: + out_data['PredictedLabel'] = out_data['PredictedLabel']*1 + + if y is not None: # We need to fix the schema for ranking metrics if evaltype == 'ranking':