diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj
index c9c73556..fb4a8e03 100644
--- a/src/python/nimbusml.pyproj
+++ b/src/python/nimbusml.pyproj
@@ -67,9 +67,7 @@
-
- Code
-
+
diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py
index 692e1dea..c1143160 100644
--- a/src/python/nimbusml/pipeline.py
+++ b/src/python/nimbusml/pipeline.py
@@ -19,6 +19,7 @@
from scipy.sparse import csr_matrix
from sklearn.utils.validation import check_X_y, check_array
from sklearn.utils.multiclass import unique_labels
+from zipfile import ZipFile
from .internal.core.base_pipeline_item import BasePipelineItem
from .internal.entrypoints.data_customtextloader import \
@@ -40,6 +41,8 @@
from .internal.entrypoints.models_summarizer import models_summarizer
from .internal.entrypoints.transforms_datasetscorer import \
transforms_datasetscorer
+from .internal.entrypoints.transforms_datasettransformscorer import \
+ transforms_datasettransformscorer
from .internal.entrypoints.transforms_featurecombiner import \
transforms_featurecombiner
from .internal.entrypoints.transforms_featurecontributioncalculationtransformer import \
@@ -1816,22 +1819,44 @@ def _predict(self, X, y=None,
isinstance(X, DataFrame) and isinstance(y, (str, tuple))):
y = y_temp
+ is_transformer_chain = False
+ with ZipFile(self.model) as model_zip:
+ is_transformer_chain = any('TransformerChain' in item
+ for item in model_zip.namelist())
+
all_nodes = []
- inputs = dict([('data', ''), ('predictor_model', self.model)])
- if isinstance(X, FileDataStream):
- importtext_node = data_customtextloader(
- input_file="$file",
+ if is_transformer_chain:
+ inputs = dict([('data', ''), ('transform_model', self.model)])
+ if isinstance(X, FileDataStream):
+ importtext_node = data_customtextloader(
+ input_file="$file",
+ data="$data",
+ custom_schema=schema.to_string(
+ add_sep=True))
+ all_nodes = [importtext_node]
+ inputs = dict([('file', ''), ('transform_model', self.model)])
+
+ score_node = transforms_datasettransformscorer(
data="$data",
- custom_schema=schema.to_string(
- add_sep=True))
- all_nodes = [importtext_node]
- inputs = dict([('file', ''), ('predictor_model', self.model)])
-
- score_node = transforms_datasetscorer(
- data="$data",
- predictor_model="$predictor_model",
- scored_data="$scoredVectorData")
- all_nodes.extend([score_node])
+ transform_model="$transform_model",
+ scored_data="$scoredVectorData")
+ all_nodes.extend([score_node])
+ else:
+ inputs = dict([('data', ''), ('predictor_model', self.model)])
+ if isinstance(X, FileDataStream):
+ importtext_node = data_customtextloader(
+ input_file="$file",
+ data="$data",
+ custom_schema=schema.to_string(
+ add_sep=True))
+ all_nodes = [importtext_node]
+ inputs = dict([('file', ''), ('predictor_model', self.model)])
+
+ score_node = transforms_datasetscorer(
+ data="$data",
+ predictor_model="$predictor_model",
+ scored_data="$scoredVectorData")
+ all_nodes.extend([score_node])
if (evaltype in ['binary', 'multiclass']) or \
(hasattr(self, 'steps')
@@ -1889,6 +1914,10 @@ def _predict(self, X, y=None,
self._run_time = time.time() - start_time
raise e
+ if is_transformer_chain:
+ out_data['PredictedLabel'] = out_data['PredictedLabel']*1
+
+
if y is not None:
# We need to fix the schema for ranking metrics
if evaltype == 'ranking':