Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion release-next.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,16 @@
[PR#232](https://github.com/microsoft/NimbusML/pull/232)
Enable passing python executable to dataprep package, so dataprep can execute python transformations

- **Fixed `Pipeline.transform()` in transform only `Pipeline` fails if y column is provided **

[PR#294](https://github.com/microsoft/NimbusML/pull/294)
Enable calling `.transform()` on a `Pipeline` containing only transforms when the y column is provided

## **Breaking Changes**

None.
- **Removed `y` parameter from `Pipeline.transform()`**
[PR#294](https://github.com/microsoft/NimbusML/pull/294)
Removed `y` parameter from `Pipeline.transform()` as it is not needed nor used for transforming data with a fitted `Pipeline`.

## **Enhancements**

Expand Down
1 change: 1 addition & 0 deletions src/python/nimbusml.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,7 @@
<Compile Include="nimbusml\tests\pipeline\test_pipeline_split_models.py" />
<Compile Include="nimbusml\tests\pipeline\test_pipeline_combining.py" />
<Compile Include="nimbusml\tests\pipeline\test_pipeline_subclassing.py" />
<Compile Include="nimbusml\tests\pipeline\test_pipeline_transform_method.py" />
<Compile Include="nimbusml\tests\preprocessing\normalization\test_lpscaler.py" />
<Compile Include="nimbusml\tests\preprocessing\normalization\test_meanvariancescaler.py" />
<Compile Include="nimbusml\tests\preprocessing\schema\test_prefixcolumnconcatenator.py" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
# LightLda: cluster topics
import pandas
from nimbusml import Pipeline
from nimbusml.feature_extraction.text import LightLda
from nimbusml.feature_extraction.text import NGramFeaturizer
from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
from nimbusml.feature_extraction.text import LightLda, NGramFeaturizer
from nimbusml.feature_extraction.text.extractor import Ngram

# create the data
topics = pandas.DataFrame(data=dict(review=[
Expand All @@ -19,7 +18,7 @@

# there are three main topics in our data. set num_topic=3
# and see if LightLDA vectors for topics look similar
pipeline = Pipeline([NGramFeaturizer(word_feature_extractor=n_gram(
pipeline = Pipeline([NGramFeaturizer(word_feature_extractor=Ngram(
), vector_normalizer='None') << 'review', LightLda(num_topic=3)])
y = pipeline.fit_transform(topics)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Example with TextTransform and LogisticRegressionBinaryClassifier
import pandas
from nimbusml.feature_extraction.text import NGramFeaturizer
from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
from nimbusml.feature_extraction.text.extractor import Ngram
from nimbusml.linear_model import LogisticRegressionBinaryClassifier

train_reviews = pandas.DataFrame(
Expand Down Expand Up @@ -77,7 +77,7 @@
y = train_reviews['like']
X = train_reviews.loc[:, train_reviews.columns != 'like']

ngram = NGramFeaturizer(word_feature_extractor=n_gram()) << 'review'
ngram = NGramFeaturizer(word_feature_extractor=Ngram()) << 'review'
X = ngram.fit_transform(X)

# view the transformed numerical values and column names
Expand Down
14 changes: 1 addition & 13 deletions src/python/nimbusml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2254,7 +2254,6 @@ def test(
def transform(
self,
X,
y=None,
verbose=0,
as_binary_data_stream=False,
**params):
Expand All @@ -2275,18 +2274,7 @@ def transform(
"Model is not fitted. Train or load a model before test("
").")

if y is not None:
if len(self.steps) > 0:
last_node = self.last_node
if last_node.type == 'transform':
raise ValueError(
"Pipeline needs a trainer as last step for test()")

X, y_temp, columns_renamed, feature_columns, label_column, \
schema, weights, weight_column = self._preprocess_X_y(X, y)

if not isinstance(y, (str, tuple)):
Copy link
Member

@ganik ganik Oct 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tuple [](start = 35, length = 5)

when y could be a tuple? How y_temp is used now? #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

y_temp is not used at all in this method.


In reply to: 330999871 [](ancestors = 330999871)

y = y_temp
X, _, _, _, _, schema, _, _ = self._preprocess_X_y(X)
Copy link
Member

@ganik ganik Oct 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_preprocess_X_y [](start = 43, length = 15)

Do you need to call this at all? #Resolved


all_nodes = []

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------------------------
import unittest

import pandas
from nimbusml import Pipeline, FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.feature_extraction.text import NGramFeaturizer

path = get_dataset("wiki_detox_train").as_filepath()
data = FileDataStream.read_csv(path, sep='\t')
df = data.to_df().head()
X = df['SentimentText']

class TestPipelineTransformMethod(unittest.TestCase):

def test_transform_only_pipeline_transform_method(self):
p = Pipeline([NGramFeaturizer(char_feature_extractor=None) << 'SentimentText'])
p.fit(X)
xf = p.transform(X)
assert 'SentimentText.==rude==' in xf.columns

if __name__ == '__main__':
unittest.main()