Skip to content
This repository has been archived by the owner on Jun 22, 2022. It is now read-only.

Commit

Permalink
Revert "removed suffixes (#110)" (#111)
Browse files Browse the repository at this point in the history
This reverts commit 520a17a.
  • Loading branch information
Kamil A. Kaczmarek authored Oct 5, 2018
1 parent 520a17a commit 99275ea
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 37 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = '0.1'
# The full version, including alpha/beta/rc tags
release = '0.1.12'
release = '0.1.11'


# -- General configuration ---------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@

setup(name='steppy',
packages=['steppy'],
version='0.1.12',
version='0.1.11',
description='A lightweight, open-source, Python library for fast and reproducible experimentation',
long_description=long_description,
url='https://github.com/minerva-ml/steppy',
download_url='https://github.com/minerva-ml/steppy/archive/0.1.12.tar.gz',
download_url='https://github.com/minerva-ml/steppy/archive/0.1.11.tar.gz',
author='Kamil A. Kaczmarek, Jakub Czakon',
author_email='[email protected], [email protected]',
keywords=['machine-learning', 'reproducibility', 'pipeline', 'data-science'],
Expand Down
82 changes: 48 additions & 34 deletions steppy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
'load_persisted_output': False
}

_ALL_STEPS_NAMES = list()


class Step:
"""Step is a building block of steppy pipelines.
Expand Down Expand Up @@ -178,42 +180,41 @@ def __init__(self,
cache_output=False,
load_persisted_output=False):

self.name = self._format_step_name(name, transformer)
name = self._format_step_name(name, transformer)

if experiment_directory is not None:
assert isinstance(experiment_directory, str),\
'Step {} error, experiment_directory must ' \
'be str, got {} instead.'.format(self.name, type(experiment_directory))
'be str, got {} instead.'.format(name, type(experiment_directory))
else:
experiment_directory = os.path.join(os.path.expanduser("~"), '.steppy')
logger.info('Using default experiment directory: {}'.format(experiment_directory))

if output_directory is not None:
assert isinstance(output_directory, str),\
'Step {}, output_directory must be str, got {} instead'.format(self.name, type(output_directory))
'Step {}, output_directory must be str, got {} instead'.format(name, type(output_directory))

if input_data is not None:
assert isinstance(input_data, list), 'Step {} error, input_data must be list, ' \
'got {} instead.'.format(self.name, type(input_data))
'got {} instead.'.format(name, type(input_data))
if input_steps is not None:
assert isinstance(input_steps, list), 'Step {} error, input_steps must be list, ' \
'got {} instead.'.format(self.name, type(input_steps))
'got {} instead.'.format(name, type(input_steps))
if adapter is not None:
assert isinstance(adapter, Adapter), 'Step {} error, adapter must be an instance ' \
'of {}'.format(self.name, str(Adapter))
'of {}'.format(name, str(Adapter))

assert isinstance(cache_output, bool), 'Step {} error, cache_output must be bool, ' \
'got {} instead.'.format(self.name, type(cache_output))
'got {} instead.'.format(name, type(cache_output))
assert isinstance(persist_output, bool), 'Step {} error, persist_output must be bool, ' \
'got {} instead.'.format(self.name, type(persist_output))
'got {} instead.'.format(name, type(persist_output))
assert isinstance(load_persisted_output, bool),\
'Step {} error, load_persisted_output ' \
'must be bool, got {} instead.'.format(self.name, type(load_persisted_output))
'must be bool, got {} instead.'.format(name, type(load_persisted_output))
assert isinstance(force_fitting, bool), 'Step {} error, force_fitting must be bool, ' \
'got {} instead.'.format(self.name, type(force_fitting))
'got {} instead.'.format(name, type(force_fitting))

self._validate_upstream_names()
logger.info('Initializing Step {}'.format(self.name))
logger.info('Initializing Step {}'.format(name))

self.transformer = transformer
self.output_directory = output_directory
Expand All @@ -227,7 +228,11 @@ def __init__(self,
self.force_fitting = force_fitting

self.output = None
self.name = self._apply_suffix(name)
_ALL_STEPS_NAMES.append(self.name)

self.experiment_directory = os.path.join(experiment_directory)

self._prepare_experiment_directories()
self._mode = 'train'

Expand Down Expand Up @@ -487,7 +492,7 @@ def get_step_by_name(self, name):
return self.all_upstream_steps[name]
except KeyError as e:
msg = 'No Step with name "{}" found. ' \
'You have following Steps: {}'.format(name, list(self.all_upstream_steps.keys()))
'You have following Steps: {}'.format(name, _ALL_STEPS_NAMES)
raise StepError(msg) from e

def persist_upstream_structure(self):
Expand Down Expand Up @@ -520,8 +525,9 @@ def _fit_transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand All @@ -531,8 +537,9 @@ def _fit_transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.fit_transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error ' \
'during "fit_transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error during "fit_transform()" operation. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, fitting and transforming completed'.format(self.name))
Expand All @@ -545,8 +552,10 @@ def _fit_transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'This Transformer is not fittable. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand All @@ -570,8 +579,9 @@ def _transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand All @@ -585,8 +595,10 @@ def _transform_operation(self, step_inputs):
try:
step_output_data = self.transformer.transform(**step_inputs)
except Exception as e:
msg = 'Step {}, Transformer "{}" error ' \
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
'This Transformer is not fittable. ' \
'Check "Step.transformer" implementation"'.format(self.name,
self.transformer.__class__.__name__)
raise StepError(msg) from e

logger.info('Step {}, transforming completed'.format(self.name))
Expand Down Expand Up @@ -640,7 +652,6 @@ def _prepare_experiment_directories(self):
os.makedirs(os.path.join(self.experiment_directory, dir_name), exist_ok=True)

def _get_steps(self, all_steps):
self._check_name_uniqueness(all_steps=all_steps)
for input_step in self.input_steps:
all_steps = input_step._get_steps(all_steps)
all_steps[self.name] = self
Expand All @@ -659,16 +670,19 @@ def _validate_step_name(self, name):
assert isinstance(name, str) or isinstance(name, float) or isinstance(name, int),\
'Step name must be str, float or int. Got {} instead.'.format(type(name))

def _check_name_uniqueness(self, all_steps):
if self.name in all_steps.keys():
raise ValueError('Step with name "{}", already exist. Assign unique Step name.'.format(self.name))

def _validate_upstream_names(self):
try:
_ = self.all_upstream_steps.keys()
except ValueError as e:
msg = 'Incorrect Step names'
raise StepError(msg) from e
def _apply_suffix(self, name):
"""returns suffix '_k'
Where 'k' is int that denotes highest increment of step with the same name.
"""
highest_id = 0
for x in _ALL_STEPS_NAMES:
if not x == name:
key_id = x.split('_')[-1]
key_stripped = x[:-len(key_id) - 1]
if key_stripped == name:
if int(key_id) >= highest_id:
highest_id += 1
return '{}_{}'.format(name, highest_id)

def _build_structure_dict(self, structure_dict):
for input_step in self.input_steps:
Expand Down

0 comments on commit 99275ea

Please sign in to comment.