Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/python/nimbusml/internal/core/base_pipeline_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,8 @@ def _get_node(self, **params):
def __getstate__(self):
"Selects what to pickle."
odict = self.__dict__.copy()
odict['export_version'] = 1

if hasattr(self, 'model_') and \
self.model_ is not None and os.path.isfile(self.model_):
with open(self.model_, "rb") as mfile:
Expand All @@ -387,8 +389,11 @@ def __getstate__(self):
def __setstate__(self, state):
"Restore a pickled object."
for k, v in state.items():
if k not in {'modelbytes', 'type'}:
if k not in {'modelbytes', 'type', 'export_version'}:
setattr(self, k, v)

# Note: modelbytes and type were
# added before export_version 1
if 'modelbytes' in state:
(fd, modelfile) = tempfile.mkstemp()
fl = os.fdopen(fd, "wb")
Expand Down
33 changes: 33 additions & 0 deletions src/python/nimbusml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import inspect
import itertools
import os
import tempfile
import time
import warnings
from collections import OrderedDict, namedtuple, defaultdict
Expand Down Expand Up @@ -2265,6 +2266,38 @@ def load_model(self, src):
self.model = src
self.steps = []

def __getstate__(self):
odict = {'export_version': 1}

if hasattr(self, 'steps'):
odict['steps'] = self.steps

if (hasattr(self, 'model') and
self.model is not None and
os.path.isfile(self.model)):

with open(self.model, "rb") as f:
odict['modelbytes'] = f.read()

return odict

def __setstate__(self, state):
self.steps = []
self.model = None
self.random_state = None

for k, v in state.items():
if k not in {'modelbytes', 'export_version'}:
setattr(self, k, v)

if state.get('export_version', 0) == 1:
if 'modelbytes' in state:
(fd, modelfile) = tempfile.mkstemp()
fl = os.fdopen(fd, "wb")
fl.write(state['modelbytes'])
fl.close()
self.model = modelfile

@trace
def score(
self,
Expand Down
111 changes: 107 additions & 4 deletions src/python/nimbusml/tests/pipeline/test_load_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Licensed under the MIT License.
# --------------------------------------------------------------------------------------------

import os
import pickle
import unittest

Expand Down Expand Up @@ -44,8 +45,14 @@ def test_model_dataframe(self):
model_nimbusml.fit(train, label)

# Save with pickle
pickle.dump(model_nimbusml, open('nimbusml_model.p', 'wb'))
model_nimbusml_pickle = pickle.load(open("nimbusml_model.p", "rb"))
pickle_filename = 'nimbusml_model.p'
with open(pickle_filename, 'wb') as f:
pickle.dump(model_nimbusml, f)

with open(pickle_filename, "rb") as f:
model_nimbusml_pickle = pickle.load(f)

os.remove(pickle_filename)

score1 = model_nimbusml.predict(test).head(5)
score2 = model_nimbusml_pickle.predict(test).head(5)
Expand All @@ -72,6 +79,8 @@ def test_model_dataframe(self):
model_nimbusml_load.sum().sum(),
decimal=2)

os.remove('model.nimbusml.m')

def test_model_datastream(self):
model_nimbusml = Pipeline(
steps=[
Expand All @@ -85,8 +94,14 @@ def test_model_datastream(self):
model_nimbusml.fit(train, label)

# Save with pickle
pickle.dump(model_nimbusml, open('nimbusml_model.p', 'wb'))
model_nimbusml_pickle = pickle.load(open("nimbusml_model.p", "rb"))
pickle_filename = 'nimbusml_model.p'
with open(pickle_filename, 'wb') as f:
pickle.dump(model_nimbusml, f)

with open(pickle_filename, "rb") as f:
model_nimbusml_pickle = pickle.load(f)

os.remove(pickle_filename)

score1 = model_nimbusml.predict(test).head(5)
score2 = model_nimbusml_pickle.predict(test).head(5)
Expand Down Expand Up @@ -119,6 +134,94 @@ def test_model_datastream(self):
model_nimbusml_load.sum().sum(),
decimal=2)

os.remove('model.nimbusml.m')

def test_pipeline_saves_complete_model_file_when_pickled(self):
model_nimbusml = Pipeline(
steps=[
('cat',
OneHotVectorizer() << categorical_columns),
('linear',
FastLinearBinaryClassifier(
shuffle=False,
number_of_threads=1))])

model_nimbusml.fit(train, label)
metrics, score = model_nimbusml.test(test, test_label, output_scores=True)

pickle_filename = 'nimbusml_model.p'

# Save with pickle
with open(pickle_filename, 'wb') as f:
pickle.dump(model_nimbusml, f)

# Remove the pipeline model from disk so
# that the unpickled pipeline is forced
# to get its model from the pickled file.
os.remove(model_nimbusml.model)

with open(pickle_filename, "rb") as f:
model_nimbusml_pickle = pickle.load(f)

os.remove(pickle_filename)

metrics_pickle, score_pickle = model_nimbusml_pickle.test(
test, test_label, output_scores=True)

assert_almost_equal(score.sum().sum(),
score_pickle.sum().sum(),
decimal=2)

assert_almost_equal(metrics.sum().sum(),
metrics_pickle.sum().sum(),
decimal=2)

def test_unfitted_pickled_pipeline_can_be_fit(self):
pipeline = Pipeline(
steps=[
('cat',
OneHotVectorizer() << categorical_columns),
('linear',
FastLinearBinaryClassifier(
shuffle=False,
number_of_threads=1))])

pipeline.fit(train, label)
metrics, score = pipeline.test(test, test_label, output_scores=True)

# Create a new unfitted pipeline
pipeline = Pipeline(
steps=[
('cat',
OneHotVectorizer() << categorical_columns),
('linear',
FastLinearBinaryClassifier(
shuffle=False,
number_of_threads=1))])

pickle_filename = 'nimbusml_model.p'

# Save with pickle
with open(pickle_filename, 'wb') as f:
pickle.dump(pipeline, f)

with open(pickle_filename, "rb") as f:
pipeline_pickle = pickle.load(f)

os.remove(pickle_filename)

pipeline_pickle.fit(train, label)
metrics_pickle, score_pickle = pipeline_pickle.test(
test, test_label, output_scores=True)

assert_almost_equal(score.sum().sum(),
score_pickle.sum().sum(),
decimal=2)

assert_almost_equal(metrics.sum().sum(),
metrics_pickle.sum().sum(),
decimal=2)


if __name__ == '__main__':
unittest.main()
6 changes: 6 additions & 0 deletions src/python/nimbusml/tests/scikit/test_uci_adult_scikit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Licensed under the MIT License.
# --------------------------------------------------------------------------------------------

import os
import pickle
import unittest

Expand Down Expand Up @@ -111,6 +112,7 @@ def test_pickle_predictor(self):
# Unpickle model and score. We should get the exact same accuracy as
# above
s = pickle.dumps(ftree)
os.remove(ftree.model_)
ftree2 = pickle.loads(s)
scores2 = ftree2.predict(X_test)
accu2 = np.mean(y_test.values.ravel() == scores2.values)
Expand All @@ -130,6 +132,7 @@ def test_pickle_transform(self):
# Unpickle transform and generate output.
# We should get the exact same output as above
s = pickle.dumps(cat)
os.remove(cat.model_)
cat2 = pickle.loads(s)
out2 = cat2.transform(X_train)
assert_equal(
Expand Down Expand Up @@ -158,7 +161,10 @@ def test_pickle_pipeline(self):
# Unpickle model and score. We should get the exact same accuracy as
# above
s = pickle.dumps(pipe)
os.remove(cat.model_)
os.remove(ftree.model_)
pipe2 = pickle.loads(s)

scores2 = pipe2.predict(X_test)
accu2 = np.mean(y_test.values.ravel() == scores2.values)
assert_equal(
Expand Down