diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index dd26ebf99..6e52f0d6a 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -54,7 +54,9 @@
     setup_logger,
     start_log_server,
 )
+from autoPyTorch.utils.parallel import preload_modules
 from autoPyTorch.utils.pipeline import get_configuration_space, get_dataset_requirements
+from autoPyTorch.utils.single_thread_client import SingleThreadedClient
 from autoPyTorch.utils.stopwatch import StopWatch
 
 
@@ -190,7 +192,16 @@ def __init__(
 
         self.stop_logging_server = None  # type: Optional[multiprocessing.synchronize.Event]
 
+        # Single core, local runs should use fork
+        # to prevent the __main__ requirements in
+        # examples. Nevertheless, multi-process runs
+        # have spawn as requirement to reduce the
+        # possibility of a deadlock
         self._dask_client = None
+        self._multiprocessing_context = 'forkserver'
+        if self.n_jobs == 1:
+            self._multiprocessing_context = 'fork'
+            self._dask_client = SingleThreadedClient()
 
         self.search_space_updates = search_space_updates
         if search_space_updates is not None:
@@ -300,7 +311,8 @@ def _get_logger(self, name: str) -> PicklableClientLogger:
         # under the above logging configuration setting
         # We need to specify the logger_name so that received records
         # are treated under the logger_name ROOT logger setting
-        context = multiprocessing.get_context('spawn')
+        context = multiprocessing.get_context(self._multiprocessing_context)
+        preload_modules(context)
         self.stop_logging_server = context.Event()
         port = context.Value('l')  # be safe by using a long
         port.value = -1
@@ -505,6 +517,7 @@ def _do_dummy_prediction(self) -> None:
         stats = Stats(scenario_mock)
         stats.start_timing()
         ta = ExecuteTaFuncWithQueue(
+            pynisher_context=self._multiprocessing_context,
             backend=self._backend,
             seed=self.seed,
             metric=self._metric,
@@ -599,6 +612,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
                 stats = Stats(scenario_mock)
                 stats.start_timing()
                 ta = ExecuteTaFuncWithQueue(
+                    pynisher_context=self._multiprocessing_context,
                     backend=self._backend,
                     seed=self.seed,
                     metric=self._metric,
@@ -929,6 +943,7 @@ def _search(
                 random_state=self.seed,
                 precision=precision,
                 logger_port=self._logger_port,
+                pynisher_context=self._multiprocessing_context,
             )
             self._stopwatch.stop_task(ensemble_task_name)
 
@@ -969,6 +984,7 @@ def _search(
                 start_num_run=self._backend.get_next_num_run(peek=True),
                 search_space_updates=self.search_space_updates,
                 portfolio_selection=portfolio_selection,
+                pynisher_context=self._multiprocessing_context,
             )
             try:
                 run_history, self.trajectory, budget_type = \
@@ -1299,5 +1315,6 @@ def _print_debug_info_to_log(self) -> None:
         self._logger.debug('  System: %s', platform.system())
         self._logger.debug('  Machine: %s', platform.machine())
         self._logger.debug('  Platform: %s', platform.platform())
+        self._logger.debug('  multiprocessing_context: %s', str(self._multiprocessing_context))
         for key, value in vars(self).items():
             self._logger.debug(f"\t{key}->{value}")
diff --git a/autoPyTorch/ensemble/ensemble_builder.py b/autoPyTorch/ensemble/ensemble_builder.py
index 8be96a339..fc9f0d054 100644
--- a/autoPyTorch/ensemble/ensemble_builder.py
+++ b/autoPyTorch/ensemble/ensemble_builder.py
@@ -36,6 +36,7 @@
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.pipeline.components.training.metrics.utils import calculate_loss, calculate_score
 from autoPyTorch.utils.logging_ import get_named_client_logger
+from autoPyTorch.utils.parallel import preload_modules
 
 Y_ENSEMBLE = 0
 Y_TEST = 1
@@ -64,6 +65,7 @@ def __init__(
         ensemble_memory_limit: Optional[int],
         random_state: int,
         logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+        pynisher_context: str = 'fork',
     ):
         """ SMAC callback to handle ensemble building
         Args:
@@ -111,6 +113,8 @@ def __init__(
                 read at most n new prediction files in each iteration
             logger_port: int
                 port in where to publish a msg
+            pynisher_context: str
+                The multiprocessing context for pynisher. One of spawn/fork/forkserver.
 
         Returns:
             List[Tuple[int, float, float, float]]:
@@ -135,6 +139,7 @@ def __init__(
         self.ensemble_memory_limit = ensemble_memory_limit
         self.random_state = random_state
         self.logger_port = logger_port
+        self.pynisher_context = pynisher_context
 
         # Store something similar to SMAC's runhistory
         self.history = []  # type: List[Dict[str, float]]
@@ -160,7 +165,6 @@ def __call__(
     def build_ensemble(
         self,
         dask_client: dask.distributed.Client,
-        pynisher_context: str = 'spawn',
         unit_test: bool = False
     ) -> None:
 
@@ -236,7 +240,7 @@ def build_ensemble(
                     iteration=self.iteration,
                     return_predictions=False,
                     priority=100,
-                    pynisher_context=pynisher_context,
+                    pynisher_context=self.pynisher_context,
                     logger_port=self.logger_port,
                     unit_test=unit_test,
                 ))
@@ -585,11 +589,11 @@ def __init__(
     def run(
         self,
         iteration: int,
+        pynisher_context: str,
         time_left: Optional[float] = None,
         end_at: Optional[float] = None,
         time_buffer: int = 5,
         return_predictions: bool = False,
-        pynisher_context: str = 'spawn',  # only change for unit testing!
     ) -> Tuple[
         List[Dict[str, float]],
         int,
@@ -655,6 +659,7 @@ def run(
             if wall_time_in_s < 1:
                 break
             context = multiprocessing.get_context(pynisher_context)
+            preload_modules(context)
 
             safe_ensemble_script = pynisher.enforce_limits(
                 wall_time_in_s=wall_time_in_s,
diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py
index 9c3ec2635..32e869ba7 100644
--- a/autoPyTorch/evaluation/tae.py
+++ b/autoPyTorch/evaluation/tae.py
@@ -29,6 +29,7 @@
 from autoPyTorch.utils.common import replace_string_bool_to_bool
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
+from autoPyTorch.utils.parallel import preload_modules
 
 
 def fit_predict_try_except_decorator(
@@ -92,29 +93,29 @@ class ExecuteTaFuncWithQueue(AbstractTAFunc):
     """
 
     def __init__(
-            self,
-            backend: Backend,
-            seed: int,
-            metric: autoPyTorchMetric,
-            cost_for_crash: float,
-            abort_on_first_run_crash: bool,
-            pipeline_config: typing.Optional[typing.Dict[str, typing.Any]] = None,
-            initial_num_run: int = 1,
-            stats: typing.Optional[Stats] = None,
-            run_obj: str = 'quality',
-            par_factor: int = 1,
-            output_y_hat_optimization: bool = True,
-            include: typing.Optional[typing.Dict[str, typing.Any]] = None,
-            exclude: typing.Optional[typing.Dict[str, typing.Any]] = None,
-            memory_limit: typing.Optional[int] = None,
-            disable_file_output: bool = False,
-            init_params: typing.Dict[str, typing.Any] = None,
-            budget_type: str = None,
-            ta: typing.Optional[typing.Callable] = None,
-            logger_port: int = None,
-            all_supported_metrics: bool = True,
-            pynisher_context: str = 'spawn',
-            search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None
+        self,
+        backend: Backend,
+        seed: int,
+        metric: autoPyTorchMetric,
+        cost_for_crash: float,
+        abort_on_first_run_crash: bool,
+        pynisher_context: str,
+        pipeline_config: typing.Optional[typing.Dict[str, typing.Any]] = None,
+        initial_num_run: int = 1,
+        stats: typing.Optional[Stats] = None,
+        run_obj: str = 'quality',
+        par_factor: int = 1,
+        output_y_hat_optimization: bool = True,
+        include: typing.Optional[typing.Dict[str, typing.Any]] = None,
+        exclude: typing.Optional[typing.Dict[str, typing.Any]] = None,
+        memory_limit: typing.Optional[int] = None,
+        disable_file_output: bool = False,
+        init_params: typing.Dict[str, typing.Any] = None,
+        budget_type: str = None,
+        ta: typing.Optional[typing.Callable] = None,
+        logger_port: int = None,
+        all_supported_metrics: bool = True,
+        search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None
     ):
 
         eval_function = autoPyTorch.evaluation.train_evaluator.eval_function
@@ -249,6 +250,7 @@ def run(
     ) -> typing.Tuple[StatusType, float, float, typing.Dict[str, typing.Any]]:
 
         context = multiprocessing.get_context(self.pynisher_context)
+        preload_modules(context)
         queue: multiprocessing.queues.Queue = context.Queue()
 
         if not (instance_specific is None or instance_specific == '0'):
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index 2ae894e8b..d1cd7d55d 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -109,7 +109,8 @@ def __init__(self,
                  ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
                  logger_port: typing.Optional[int] = None,
                  search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
-                 portfolio_selection: typing.Optional[str] = None
+                 portfolio_selection: typing.Optional[str] = None,
+                 pynisher_context: str = 'spawn',
                  ):
         """
         Interface to SMAC. This method calls the SMAC optimize method, and allows
@@ -156,6 +157,8 @@ def __init__(self,
                 Additional arguments to the smac scenario
             get_smac_object_callback (typing.Optional[typing.Callable]):
                 Allows to create a user specified SMAC object
+            pynisher_context (str):
+                A string indicating the multiprocessing context to use
             ensemble_callback (typing.Optional[EnsembleBuilderManager]):
                 A callback used in this scenario to start ensemble building subtasks
             portfolio_selection (str), (default=None):
@@ -204,6 +207,7 @@ def __init__(self,
         self.disable_file_output = disable_file_output
         self.smac_scenario_args = smac_scenario_args
         self.get_smac_object_callback = get_smac_object_callback
+        self.pynisher_context = pynisher_context
 
         self.ensemble_callback = ensemble_callback
 
@@ -274,7 +278,8 @@ def run_smbo(self, func: typing.Optional[typing.Callable] = None
             logger_port=self.logger_port,
             all_supported_metrics=self.all_supported_metrics,
             pipeline_config=self.pipeline_config,
-            search_space_updates=self.search_space_updates
+            search_space_updates=self.search_space_updates,
+            pynisher_context=self.pynisher_context,
         )
         ta = ExecuteTaFuncWithQueue
         self.logger.info("Created TA")
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/__init__.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/__init__.py
index e69de29bb..33dd0cd32 100644
--- a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/__init__.py
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/__init__.py
@@ -0,0 +1,106 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer
+
+
+normalise_directory = os.path.split(__file__)[0]
+_normalizers = find_components(__package__,
+                               normalise_directory,
+                               BaseNormalizer)
+
+_addons = ThirdPartyComponents(BaseNormalizer)
+
+
+def add_normalizer(normalizer: BaseNormalizer) -> None:
+    _addons.add_component(normalizer)
+
+
+class NormalizerChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing normalizer component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available normalizer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseNormalizer components available
+                as choices for encoding the categorical columns
+        """
+        components = OrderedDict()
+        components.update(_normalizers)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
+                                                                include=include,
+                                                                exclude=exclude)
+
+        if len(available_preprocessors) == 0:
+            raise ValueError("no image normalizers found, please add an image normalizer")
+
+        if default is None:
+            defaults = ['ImageNormalizer', 'NoNormalizer']
+            for default_ in defaults:
+                if default_ in available_preprocessors:
+                    if include is not None and default_ not in include:
+                        continue
+                    if exclude is not None and default_ in exclude:
+                        continue
+                    default = default_
+                    break
+
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_preprocessors,
+                                                               choice_hyperparameter.value_range))
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         choice_hyperparameter.value_range,
+                                                         default_value=choice_hyperparameter.default_value)
+        else:
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         list(available_preprocessors.keys()),
+                                                         default_value=default)
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of preprocessor choices
+        for name in preprocessor.choices:
+            preprocessor_configuration_space = available_preprocessors[name].\
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, preprocessor_configuration_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer_choice.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer_choice.py
deleted file mode 100644
index acc9f5b64..000000000
--- a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer_choice.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Any, Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer
-
-
-normalise_directory = os.path.split(__file__)[0]
-_normalizers = find_components(__package__,
-                               normalise_directory,
-                               BaseNormalizer)
-
-_addons = ThirdPartyComponents(BaseNormalizer)
-
-
-def add_normalizer(normalizer: BaseNormalizer) -> None:
-    _addons.add_component(normalizer)
-
-
-class NormalizerChoice(autoPyTorchChoice):
-    """
-    Allows for dynamically choosing encoding component at runtime
-    """
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available normalizer components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all BaseNormalise components available
-                as choices for encoding the categorical columns
-        """
-        components = OrderedDict()
-        components.update(_normalizers)
-        components.update(_addons.components)
-        return components
-
-    def get_hyperparameter_search_space(self,
-                                        dataset_properties: Optional[Dict[str, Any]] = None,
-                                        default: Optional[str] = None,
-                                        include: Optional[List[str]] = None,
-                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = dict()
-
-        dataset_properties = {**self.dataset_properties, **dataset_properties}
-
-        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
-                                                                include=include,
-                                                                exclude=exclude)
-
-        if len(available_preprocessors) == 0:
-            raise ValueError("no image normalizers found, please add an image normalizer")
-
-        if default is None:
-            defaults = ['ImageNormalizer', 'NoNormalizer']
-            for default_ in defaults:
-                if default_ in available_preprocessors:
-                    if include is not None and default_ not in include:
-                        continue
-                    if exclude is not None and default_ in exclude:
-                        continue
-                    default = default_
-                    break
-
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_preprocessors,
-                                                               choice_hyperparameter.value_range))
-            preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                         choice_hyperparameter.value_range,
-                                                         default_value=choice_hyperparameter.default_value)
-        else:
-            preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                         list(available_preprocessors.keys()),
-                                                         default_value=default)
-        cs.add_hyperparameter(preprocessor)
-
-        # add only child hyperparameters of early_preprocessor choices
-        for name in preprocessor.choices:
-            preprocessor_configuration_space = available_preprocessors[name].\
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
-
-        self.configuration_space = cs
-        self.dataset_properties = dataset_properties
-        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/__init__.py
index e69de29bb..f7399005a 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/__init__.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/__init__.py
@@ -0,0 +1,137 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
+
+
+encoding_directory = os.path.split(__file__)[0]
+_encoders = find_components(__package__,
+                            encoding_directory,
+                            BaseEncoder)
+_addons = ThirdPartyComponents(BaseEncoder)
+
+
+def add_encoder(encoder: BaseEncoder) -> None:
+    _addons.add_component(encoder)
+
+
+class EncoderChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing encoding component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available encoder components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseEncoder components available
+                as choices for encoding the categorical columns
+        """
+        components = OrderedDict()
+        components.update(_encoders)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
+                                                                include=include,
+                                                                exclude=exclude)
+
+        if len(available_preprocessors) == 0:
+            raise ValueError("no encoders found, please add a encoder")
+
+        if default is None:
+            defaults = ['OneHotEncoder', 'NoEncoder']
+            for default_ in defaults:
+                if default_ in available_preprocessors:
+                    if include is not None and default_ not in include:
+                        continue
+                    if exclude is not None and default_ in exclude:
+                        continue
+                    default = default_
+                    break
+
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_preprocessors,
+                                                               choice_hyperparameter.value_range))
+            if len(dataset_properties['categorical_columns']) == 0:
+                assert len(choice_hyperparameter.value_range) == 1
+                assert 'NoEncoder' in choice_hyperparameter.value_range, \
+                    "Provided {} in choices, however, the dataset " \
+                    "is incompatible with it".format(choice_hyperparameter.value_range)
+
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         choice_hyperparameter.value_range,
+                                                         default_value=choice_hyperparameter.default_value)
+        else:
+            # add only no encoder to choice hyperparameters in case the dataset is only numerical
+            if len(dataset_properties['categorical_columns']) == 0:
+                default = 'NoEncoder'
+                if include is not None and default not in include:
+                    raise ValueError("Provided {} in include, however, the dataset "
+                                     "is incompatible with it".format(include))
+                preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                             ['NoEncoder'],
+                                                             default_value=default)
+            else:
+                preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                             list(available_preprocessors.keys()),
+                                                             default_value=default)
+
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of preprocessor choices
+        for name in preprocessor.choices:
+            preprocessor_configuration_space = available_preprocessors[name].\
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, preprocessor_configuration_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+        Args:
+            dataset_properties:
+
+        """
+        super()._check_dataset_properties(dataset_properties)
+        assert 'numerical_columns' in dataset_properties.keys(), \
+            "Dataset properties must contain information about numerical columns"
+        assert 'categorical_columns' in dataset_properties.keys(), \
+            "Dataset properties must contain information about categorical columns"
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py
deleted file mode 100644
index 7ddbf8eaf..000000000
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Any, Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
-
-
-encoding_directory = os.path.split(__file__)[0]
-_encoders = find_components(__package__,
-                            encoding_directory,
-                            BaseEncoder)
-_addons = ThirdPartyComponents(BaseEncoder)
-
-
-def add_encoder(encoder: BaseEncoder) -> None:
-    _addons.add_component(encoder)
-
-
-class EncoderChoice(autoPyTorchChoice):
-    """
-    Allows for dynamically choosing encoding component at runtime
-    """
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available encoder components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all BaseEncoder components available
-                as choices for encoding the categorical columns
-        """
-        components = OrderedDict()
-        components.update(_encoders)
-        components.update(_addons.components)
-        return components
-
-    def get_hyperparameter_search_space(self,
-                                        dataset_properties: Optional[Dict[str, Any]] = None,
-                                        default: Optional[str] = None,
-                                        include: Optional[List[str]] = None,
-                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = dict()
-
-        dataset_properties = {**self.dataset_properties, **dataset_properties}
-
-        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
-                                                                include=include,
-                                                                exclude=exclude)
-
-        if len(available_preprocessors) == 0:
-            raise ValueError("no encoders found, please add a encoder")
-
-        if default is None:
-            defaults = ['OneHotEncoder', 'NoEncoder']
-            for default_ in defaults:
-                if default_ in available_preprocessors:
-                    if include is not None and default_ not in include:
-                        continue
-                    if exclude is not None and default_ in exclude:
-                        continue
-                    default = default_
-                    break
-
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_preprocessors,
-                                                               choice_hyperparameter.value_range))
-            if len(dataset_properties['categorical_columns']) == 0:
-                assert len(choice_hyperparameter.value_range) == 1
-                assert 'NoEncoder' in choice_hyperparameter.value_range, \
-                    "Provided {} in choices, however, the dataset " \
-                    "is incompatible with it".format(choice_hyperparameter.value_range)
-
-            preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                         choice_hyperparameter.value_range,
-                                                         default_value=choice_hyperparameter.default_value)
-        else:
-            # add only no encoder to choice hyperparameters in case the dataset is only numerical
-            if len(dataset_properties['categorical_columns']) == 0:
-                default = 'NoEncoder'
-                if include is not None and default not in include:
-                    raise ValueError("Provided {} in include, however, the dataset "
-                                     "is incompatible with it".format(include))
-                preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                             ['NoEncoder'],
-                                                             default_value=default)
-            else:
-                preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                             list(available_preprocessors.keys()),
-                                                             default_value=default)
-
-        cs.add_hyperparameter(preprocessor)
-
-        # add only child hyperparameters of early_preprocessor choices
-        for name in preprocessor.choices:
-            preprocessor_configuration_space = available_preprocessors[name].\
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
-
-        self.configuration_space = cs
-        self.dataset_properties = dataset_properties
-        return cs
-
-    def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
-        """
-        A mechanism in code to ensure the correctness of the fit dictionary
-        It recursively makes sure that the children and parent level requirements
-        are honored before fit.
-        Args:
-            dataset_properties:
-
-        """
-        super()._check_dataset_properties(dataset_properties)
-        assert 'numerical_columns' in dataset_properties.keys(), \
-            "Dataset properties must contain information about numerical columns"
-        assert 'categorical_columns' in dataset_properties.keys(), \
-            "Dataset properties must contain information about categorical columns"
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/__init__.py
index e69de29bb..be678da94 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/__init__.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/__init__.py
@@ -0,0 +1,132 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+preprocessing_directory = os.path.split(__file__)[0]
+_preprocessors = find_components(__package__,
+                                 preprocessing_directory,
+                                 autoPyTorchFeaturePreprocessingComponent)
+_addons = ThirdPartyComponents(autoPyTorchFeaturePreprocessingComponent)
+
+
+def add_feature_preprocessor(feature_preprocessor: autoPyTorchFeaturePreprocessingComponent) -> None:
+    _addons.add_component(feature_preprocessor)
+
+
+class FeatureProprocessorChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing feature_preprocessor component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available feature_preprocessor components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all feature preprocessor components available
+                as choices for encoding the categorical columns
+        """
+        components: Dict = OrderedDict()
+        components.update(_preprocessors)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_ = self.get_available_components(dataset_properties=dataset_properties,
+                                                   include=include,
+                                                   exclude=exclude)
+
+        if len(available_) == 0:
+            raise ValueError("no feature preprocessors found, please add a feature preprocessor")
+
+        if default is None:
+            defaults = ['NoFeaturePreprocessor',
+                        'FastICA',
+                        'KernelPCA',
+                        'RandomKitchenSinks',
+                        'Nystroem',
+                        'PolynomialFeatures',
+                        'PowerTransformer',
+                        'TruncatedSVD',
+                        ]
+            for default_ in defaults:
+                if default_ in available_:
+                    if include is not None and default_ not in include:
+                        continue
+                    if exclude is not None and default_ in exclude:
+                        continue
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_,
+                                                               choice_hyperparameter.value_range))
+            if len(dataset_properties['numerical_columns']) == 0:
+                assert len(choice_hyperparameter.value_range) == 1
+                assert 'NoFeaturePreprocessor' in choice_hyperparameter.value_range, \
+                    "Provided {} in choices, however, the dataset " \
+                    "is incompatible with it".format(choice_hyperparameter.value_range)
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         choice_hyperparameter.value_range,
+                                                         default_value=choice_hyperparameter.default_value)
+        else:
+            # add only no feature preprocessor to choice hyperparameters in case the dataset is only categorical
+            if len(dataset_properties['numerical_columns']) == 0:
+                default = 'NoFeaturePreprocessor'
+                if include is not None and default not in include:
+                    raise ValueError("Provided {} in include, however, "
+                                     "the dataset is incompatible with it".format(include))
+                preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                             ['NoFeaturePreprocessor'],
+                                                             default_value=default)
+            else:
+                # Truncated SVD requires n_features > n_components
+                if len(dataset_properties['numerical_columns']) == 1:
+                    del available_['TruncatedSVD']
+                preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                             list(available_.keys()),
+                                                             default_value=default)
+
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of preprocessor choices
+        for name in preprocessor.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
+                                                                            **updates)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, config_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor_choice.py
deleted file mode 100644
index 43a1e1a66..000000000
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor_choice.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Any, Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
-    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
-
-preprocessing_directory = os.path.split(__file__)[0]
-_preprocessors = find_components(__package__,
-                                 preprocessing_directory,
-                                 autoPyTorchFeaturePreprocessingComponent)
-_addons = ThirdPartyComponents(autoPyTorchFeaturePreprocessingComponent)
-
-
-def add_feature_preprocessor(feature_preprocessor: autoPyTorchFeaturePreprocessingComponent) -> None:
-    _addons.add_component(feature_preprocessor)
-
-
-class FeatureProprocessorChoice(autoPyTorchChoice):
-    """
-    Allows for dynamically choosing feature_preprocessor component at runtime
-    """
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available feature_preprocessor components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all feature preprocessor components available
-                as choices for encoding the categorical columns
-        """
-        components: Dict = OrderedDict()
-        components.update(_preprocessors)
-        components.update(_addons.components)
-        return components
-
-    def get_hyperparameter_search_space(self,
-                                        dataset_properties: Optional[Dict[str, Any]] = None,
-                                        default: Optional[str] = None,
-                                        include: Optional[List[str]] = None,
-                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = dict()
-
-        dataset_properties = {**self.dataset_properties, **dataset_properties}
-
-        available_ = self.get_available_components(dataset_properties=dataset_properties,
-                                                   include=include,
-                                                   exclude=exclude)
-
-        if len(available_) == 0:
-            raise ValueError("no feature preprocessors found, please add a feature preprocessor")
-
-        if default is None:
-            defaults = ['NoFeaturePreprocessor',
-                        'FastICA',
-                        'KernelPCA',
-                        'RandomKitchenSinks',
-                        'Nystroem',
-                        'PolynomialFeatures',
-                        'PowerTransformer',
-                        'TruncatedSVD',
-                        ]
-            for default_ in defaults:
-                if default_ in available_:
-                    if include is not None and default_ not in include:
-                        continue
-                    if exclude is not None and default_ in exclude:
-                        continue
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_,
-                                                               choice_hyperparameter.value_range))
-            if len(dataset_properties['numerical_columns']) == 0:
-                assert len(choice_hyperparameter.value_range) == 1
-                assert 'NoFeaturePreprocessor' in choice_hyperparameter.value_range, \
-                    "Provided {} in choices, however, the dataset " \
-                    "is incompatible with it".format(choice_hyperparameter.value_range)
-            preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                         choice_hyperparameter.value_range,
-                                                         default_value=choice_hyperparameter.default_value)
-        else:
-            # add only no feature preprocessor to choice hyperparameters in case the dataset is only categorical
-            if len(dataset_properties['numerical_columns']) == 0:
-                default = 'NoFeaturePreprocessor'
-                if include is not None and default not in include:
-                    raise ValueError("Provided {} in include, however, "
-                                     "the dataset is incompatible with it".format(include))
-                preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                             ['NoFeaturePreprocessor'],
-                                                             default_value=default)
-            else:
-                # Truncated SVD requires n_features > n_components
-                if len(dataset_properties['numerical_columns']) == 1:
-                    del available_['TruncatedSVD']
-                preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                             list(available_.keys()),
-                                                             default_value=default)
-
-        cs.add_hyperparameter(preprocessor)
-
-        # add only child hyperparameters of early_preprocessor choices
-        for name in preprocessor.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
-                                                                            **updates)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, config_space,
-                                       parent_hyperparameter=parent_hyperparameter)
-
-        self.configuration_space = cs
-        self.dataset_properties = dataset_properties
-        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/__init__.py
index e69de29bb..3cdd81676 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/__init__.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/__init__.py
@@ -0,0 +1,131 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
+
+scaling_directory = os.path.split(__file__)[0]
+_scalers = find_components(__package__,
+                           scaling_directory,
+                           BaseScaler)
+
+_addons = ThirdPartyComponents(BaseScaler)
+
+
+def add_scaler(scaler: BaseScaler) -> None:
+    _addons.add_component(scaler)
+
+
+class ScalerChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing scaling component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available scaler components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseScalers components available
+                as choices for scaling
+        """
+        components = OrderedDict()
+        components.update(_scalers)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_scalers = self.get_available_components(dataset_properties=dataset_properties,
+                                                          include=include,
+                                                          exclude=exclude)
+
+        if len(available_scalers) == 0:
+            raise ValueError("no scalers found, please add a scaler")
+
+        if default is None:
+            defaults = ['StandardScaler', 'Normalizer', 'MinMaxScaler', 'NoScaler']
+            for default_ in defaults:
+                if default_ in available_scalers:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_scalers):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_scalers,
+                                                               choice_hyperparameter.value_range))
+            if len(dataset_properties['numerical_columns']) == 0:
+                assert len(choice_hyperparameter.value_range) == 1
+                if 'NoScaler' not in choice_hyperparameter.value_range:
+                    raise ValueError("Provided {} in choices, however, the dataset "
+                                     "is incompatible with it".format(choice_hyperparameter.value_range))
+
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         choice_hyperparameter.value_range,
+                                                         default_value=choice_hyperparameter.default_value)
+        else:
+            # add only no scaler to choice hyperparameters in case the dataset is only categorical
+            if len(dataset_properties['numerical_columns']) == 0:
+                default = 'NoScaler'
+                if include is not None and default not in include:
+                    raise ValueError("Provided {} in include, however, "
+                                     "the dataset is incompatible with it".format(include))
+                preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                             ['NoScaler'],
+                                                             default_value=default)
+            else:
+                preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                             list(available_scalers.keys()),
+                                                             default_value=default)
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of preprocessor choices
+        for name in preprocessor.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_scalers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
+                                                                                   **updates)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, config_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+        Args:
+            dataset_properties:
+
+        """
+        super()._check_dataset_properties(dataset_properties)
+        assert 'numerical_columns' in dataset_properties.keys() and \
+               'categorical_columns' in dataset_properties.keys(), \
+            "Dataset properties must contain information about the type of columns"
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py
deleted file mode 100644
index 7c5f22fd5..000000000
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Any, Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
-
-scaling_directory = os.path.split(__file__)[0]
-_scalers = find_components(__package__,
-                           scaling_directory,
-                           BaseScaler)
-
-_addons = ThirdPartyComponents(BaseScaler)
-
-
-def add_scaler(scaler: BaseScaler) -> None:
-    _addons.add_component(scaler)
-
-
-class ScalerChoice(autoPyTorchChoice):
-    """
-    Allows for dynamically choosing scaling component at runtime
-    """
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available scaler components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all BaseScalers components available
-                as choices for scaling
-        """
-        components = OrderedDict()
-        components.update(_scalers)
-        components.update(_addons.components)
-        return components
-
-    def get_hyperparameter_search_space(self,
-                                        dataset_properties: Optional[Dict[str, Any]] = None,
-                                        default: Optional[str] = None,
-                                        include: Optional[List[str]] = None,
-                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = dict()
-
-        dataset_properties = {**self.dataset_properties, **dataset_properties}
-
-        available_scalers = self.get_available_components(dataset_properties=dataset_properties,
-                                                          include=include,
-                                                          exclude=exclude)
-
-        if len(available_scalers) == 0:
-            raise ValueError("no scalers found, please add a scaler")
-
-        if default is None:
-            defaults = ['StandardScaler', 'Normalizer', 'MinMaxScaler', 'NoScaler']
-            for default_ in defaults:
-                if default_ in available_scalers:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_scalers):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_scalers,
-                                                               choice_hyperparameter.value_range))
-            if len(dataset_properties['numerical_columns']) == 0:
-                assert len(choice_hyperparameter.value_range) == 1
-                if 'NoScaler' not in choice_hyperparameter.value_range:
-                    raise ValueError("Provided {} in choices, however, the dataset "
-                                     "is incompatible with it".format(choice_hyperparameter.value_range))
-
-            preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                         choice_hyperparameter.value_range,
-                                                         default_value=choice_hyperparameter.default_value)
-        else:
-            # add only no scaler to choice hyperparameters in case the dataset is only categorical
-            if len(dataset_properties['numerical_columns']) == 0:
-                default = 'NoScaler'
-                if include is not None and default not in include:
-                    raise ValueError("Provided {} in include, however, "
-                                     "the dataset is incompatible with it".format(include))
-                preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                             ['NoScaler'],
-                                                             default_value=default)
-            else:
-                preprocessor = CSH.CategoricalHyperparameter('__choice__',
-                                                             list(available_scalers.keys()),
-                                                             default_value=default)
-        cs.add_hyperparameter(preprocessor)
-
-        # add only child hyperparameters of early_preprocessor choices
-        for name in preprocessor.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_scalers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
-                                                                                   **updates)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, config_space,
-                                       parent_hyperparameter=parent_hyperparameter)
-
-        self.configuration_space = cs
-        self.dataset_properties = dataset_properties
-        return cs
-
-    def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
-        """
-        A mechanism in code to ensure the correctness of the fit dictionary
-        It recursively makes sure that the children and parent level requirements
-        are honored before fit.
-        Args:
-            dataset_properties:
-
-        """
-        super()._check_dataset_properties(dataset_properties)
-        assert 'numerical_columns' in dataset_properties.keys() and \
-               'categorical_columns' in dataset_properties.keys(), \
-            "Dataset properties must contain information about the type of columns"
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/__init__.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/__init__.py
index e69de29bb..9349ab642 100644
--- a/autoPyTorch/pipeline/components/setup/lr_scheduler/__init__.py
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/__init__.py
@@ -0,0 +1,187 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+directory = os.path.split(__file__)[0]
+_schedulers = find_components(__package__,
+                              directory,
+                              BaseLRComponent)
+_addons = ThirdPartyComponents(BaseLRComponent)
+
+
+def add_scheduler(scheduler: BaseLRComponent) -> None:
+    _addons.add_component(scheduler)
+
+
+class SchedulerChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available scheduler components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all baseScheduler components available
+                as choices for learning rate scheduling
+        """
+        components = OrderedDict()
+        components.update(_schedulers)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of learning
+                rate schedulers
+
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == SchedulerChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here for
+            # schedulers based on the dataset!
+            # TODO: Think if there is any case where a scheduler
+            # is not recommended for a certain dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default scheduler to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_schedulers = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_schedulers) == 0:
+            raise ValueError("No scheduler found")
+
+        if default is None:
+            defaults = [
+                'ReduceLROnPlateau',
+                'CosineAnnealingLR',
+                'no_LRScheduler',
+                'LambdaLR',
+                'StepLR',
+                'ExponentialLR',
+            ]
+            for default_ in defaults:
+                if default_ in available_schedulers:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_schedulers):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_schedulers,
+                                                               choice_hyperparameter.value_range))
+            scheduler = CSH.CategoricalHyperparameter('__choice__',
+                                                      choice_hyperparameter.value_range,
+                                                      default_value=choice_hyperparameter.default_value)
+        else:
+            scheduler = CSH.CategoricalHyperparameter(
+                '__choice__',
+                list(available_schedulers.keys()),
+                default_value=default
+            )
+        cs.add_hyperparameter(scheduler)
+        for name in scheduler.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_schedulers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
+                                                                                      **updates)
+            parent_hyperparameter = {'parent': scheduler, 'value': name}
+            cs.add_configuration_space(
+                name,
+                config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler_choice.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler_choice.py
deleted file mode 100644
index 9349ab642..000000000
--- a/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler_choice.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
-
-directory = os.path.split(__file__)[0]
-_schedulers = find_components(__package__,
-                              directory,
-                              BaseLRComponent)
-_addons = ThirdPartyComponents(BaseLRComponent)
-
-
-def add_scheduler(scheduler: BaseLRComponent) -> None:
-    _addons.add_component(scheduler)
-
-
-class SchedulerChoice(autoPyTorchChoice):
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available scheduler components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all baseScheduler components available
-                as choices for learning rate scheduling
-        """
-        components = OrderedDict()
-        components.update(_schedulers)
-        components.update(_addons.components)
-        return components
-
-    def get_available_components(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        include: List[str] = None,
-        exclude: List[str] = None,
-    ) -> Dict[str, autoPyTorchComponent]:
-        """Filters out components based on user provided
-        include/exclude directives, as well as the dataset properties
-
-        Args:
-         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
-            to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
-             to remove from the configuration space
-         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
-             of the dataset to guide the pipeline choices of components
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: A filtered dict of learning
-                rate schedulers
-
-        """
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        if include is not None and exclude is not None:
-            raise ValueError(
-                "The argument include and exclude cannot be used together.")
-
-        available_comp = self.get_components()
-
-        if include is not None:
-            for incl in include:
-                if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
-
-        components_dict = OrderedDict()
-        for name in available_comp:
-            if include is not None and name not in include:
-                continue
-            elif exclude is not None and name in exclude:
-                continue
-
-            entry = available_comp[name]
-
-            # Exclude itself to avoid infinite loop
-            if entry == SchedulerChoice or hasattr(entry, 'get_components'):
-                continue
-
-            # target_type = dataset_properties['target_type']
-            # Apply some automatic filtering here for
-            # schedulers based on the dataset!
-            # TODO: Think if there is any case where a scheduler
-            # is not recommended for a certain dataset
-
-            components_dict[name] = entry
-
-        return components_dict
-
-    def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default scheduler to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        # Compile a list of legal preprocessors for this problem
-        available_schedulers = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(available_schedulers) == 0:
-            raise ValueError("No scheduler found")
-
-        if default is None:
-            defaults = [
-                'ReduceLROnPlateau',
-                'CosineAnnealingLR',
-                'no_LRScheduler',
-                'LambdaLR',
-                'StepLR',
-                'ExponentialLR',
-            ]
-            for default_ in defaults:
-                if default_ in available_schedulers:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_schedulers):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_schedulers,
-                                                               choice_hyperparameter.value_range))
-            scheduler = CSH.CategoricalHyperparameter('__choice__',
-                                                      choice_hyperparameter.value_range,
-                                                      default_value=choice_hyperparameter.default_value)
-        else:
-            scheduler = CSH.CategoricalHyperparameter(
-                '__choice__',
-                list(available_schedulers.keys()),
-                default_value=default
-            )
-        cs.add_hyperparameter(scheduler)
-        for name in scheduler.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_schedulers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
-                                                                                      **updates)
-            parent_hyperparameter = {'parent': scheduler, 'value': name}
-            cs.add_configuration_space(
-                name,
-                config_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call transform before the object is initialized"
-        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network/base_network.py b/autoPyTorch/pipeline/components/setup/network/base_network.py
index 81fd8e5f4..ef02f3c22 100644
--- a/autoPyTorch/pipeline/components/setup/network/base_network.py
+++ b/autoPyTorch/pipeline/components/setup/network/base_network.py
@@ -31,6 +31,7 @@ def __init__(
             FitRequirement("network_backbone", (torch.nn.Module,), user_defined=False, dataset_property=False),
             FitRequirement("network_embedding", (torch.nn.Module,), user_defined=False, dataset_property=False),
         ])
+        self.network = network
         self.final_activation = None
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchTrainingComponent:
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/__init__.py b/autoPyTorch/pipeline/components/setup/network_backbone/__init__.py
index e69de29bb..13793c393 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/__init__.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/__init__.py
@@ -0,0 +1,196 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.network_backbone.base_network_backbone import (
+    NetworkBackboneComponent,
+)
+
+directory = os.path.split(__file__)[0]
+_backbones = find_components(__package__,
+                             directory,
+                             NetworkBackboneComponent)
+_addons = ThirdPartyComponents(NetworkBackboneComponent)
+
+
+def add_backbone(backbone: NetworkBackboneComponent) -> None:
+    _addons.add_component(backbone)
+
+
+class NetworkBackboneChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available backbone components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all basebackbone components available
+                as choices for learning rate scheduling
+        """
+        components = OrderedDict()
+        components.update(_backbones)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of learning
+                rate backbones
+
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == NetworkBackboneChoice or hasattr(entry, 'get_components'):
+                continue
+
+            task_type = dataset_properties['task_type']
+            properties = entry.get_properties()
+            if 'tabular' in task_type and not properties['handles_tabular']:
+                continue
+            elif 'image' in task_type and not properties['handles_image']:
+                continue
+            elif 'time_series' in task_type and not properties['handles_time_series']:
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here for
+            # backbones based on the dataset!
+            # TODO: Think if there is any case where a backbone
+            # is not recommended for a certain dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default backbone to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_backbones = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_backbones) == 0:
+            raise ValueError("No backbone found")
+
+        if default is None:
+            defaults = [
+                'ShapedMLPBackbone',
+                'MLPBackbone',
+                'ConvNetImageBackbone',
+                'InceptionTimeBackbone',
+            ]
+            for default_ in defaults:
+                if default_ in available_backbones:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_backbones):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_backbones,
+                                                               choice_hyperparameter.value_range))
+            backbone = CSH.CategoricalHyperparameter('__choice__',
+                                                     choice_hyperparameter.value_range,
+                                                     default_value=choice_hyperparameter.default_value)
+        else:
+            backbone = CSH.CategoricalHyperparameter(
+                '__choice__',
+                list(available_backbones.keys()),
+                default_value=default
+            )
+        cs.add_hyperparameter(backbone)
+        for name in backbone.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_backbones[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
+                                                                                     **updates)
+            parent_hyperparameter = {'parent': backbone, 'value': name}
+            cs.add_configuration_space(
+                name,
+                config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone_choice.py b/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone_choice.py
deleted file mode 100644
index 13793c393..000000000
--- a/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone_choice.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.setup.network_backbone.base_network_backbone import (
-    NetworkBackboneComponent,
-)
-
-directory = os.path.split(__file__)[0]
-_backbones = find_components(__package__,
-                             directory,
-                             NetworkBackboneComponent)
-_addons = ThirdPartyComponents(NetworkBackboneComponent)
-
-
-def add_backbone(backbone: NetworkBackboneComponent) -> None:
-    _addons.add_component(backbone)
-
-
-class NetworkBackboneChoice(autoPyTorchChoice):
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available backbone components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all basebackbone components available
-                as choices for learning rate scheduling
-        """
-        components = OrderedDict()
-        components.update(_backbones)
-        components.update(_addons.components)
-        return components
-
-    def get_available_components(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        include: List[str] = None,
-        exclude: List[str] = None,
-    ) -> Dict[str, autoPyTorchComponent]:
-        """Filters out components based on user provided
-        include/exclude directives, as well as the dataset properties
-
-        Args:
-         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
-            to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
-             to remove from the configuration space
-         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
-             of the dataset to guide the pipeline choices of components
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: A filtered dict of learning
-                rate backbones
-
-        """
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        if include is not None and exclude is not None:
-            raise ValueError(
-                "The argument include and exclude cannot be used together.")
-
-        available_comp = self.get_components()
-
-        if include is not None:
-            for incl in include:
-                if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
-
-        components_dict = OrderedDict()
-        for name in available_comp:
-            if include is not None and name not in include:
-                continue
-            elif exclude is not None and name in exclude:
-                continue
-
-            entry = available_comp[name]
-
-            # Exclude itself to avoid infinite loop
-            if entry == NetworkBackboneChoice or hasattr(entry, 'get_components'):
-                continue
-
-            task_type = dataset_properties['task_type']
-            properties = entry.get_properties()
-            if 'tabular' in task_type and not properties['handles_tabular']:
-                continue
-            elif 'image' in task_type and not properties['handles_image']:
-                continue
-            elif 'time_series' in task_type and not properties['handles_time_series']:
-                continue
-
-            # target_type = dataset_properties['target_type']
-            # Apply some automatic filtering here for
-            # backbones based on the dataset!
-            # TODO: Think if there is any case where a backbone
-            # is not recommended for a certain dataset
-
-            components_dict[name] = entry
-
-        return components_dict
-
-    def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default backbone to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        # Compile a list of legal preprocessors for this problem
-        available_backbones = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(available_backbones) == 0:
-            raise ValueError("No backbone found")
-
-        if default is None:
-            defaults = [
-                'ShapedMLPBackbone',
-                'MLPBackbone',
-                'ConvNetImageBackbone',
-                'InceptionTimeBackbone',
-            ]
-            for default_ in defaults:
-                if default_ in available_backbones:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_backbones):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_backbones,
-                                                               choice_hyperparameter.value_range))
-            backbone = CSH.CategoricalHyperparameter('__choice__',
-                                                     choice_hyperparameter.value_range,
-                                                     default_value=choice_hyperparameter.default_value)
-        else:
-            backbone = CSH.CategoricalHyperparameter(
-                '__choice__',
-                list(available_backbones.keys()),
-                default_value=default
-            )
-        cs.add_hyperparameter(backbone)
-        for name in backbone.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_backbones[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
-                                                                                     **updates)
-            parent_hyperparameter = {'parent': backbone, 'value': name}
-            cs.add_configuration_space(
-                name,
-                config_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call transform before the object is initialized"
-        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/__init__.py b/autoPyTorch/pipeline/components/setup/network_embedding/__init__.py
index e69de29bb..2d634c0bb 100644
--- a/autoPyTorch/pipeline/components/setup/network_embedding/__init__.py
+++ b/autoPyTorch/pipeline/components/setup/network_embedding/__init__.py
@@ -0,0 +1,204 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.network_embedding.base_network_embedding import (
+    NetworkEmbeddingComponent,
+)
+
+directory = os.path.split(__file__)[0]
+_embeddings = find_components(__package__,
+                              directory,
+                              NetworkEmbeddingComponent)
+_addons = ThirdPartyComponents(NetworkEmbeddingComponent)
+
+
+def add_embedding(embedding: NetworkEmbeddingComponent) -> None:
+    _addons.add_component(embedding)
+
+
+class NetworkEmbeddingChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available embedding components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all NetworkEmbeddingComponents available
+                as choices for learning rate scheduling
+        """
+        components = OrderedDict()
+        components.update(_embeddings)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of learning
+                rate embeddings
+
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == NetworkEmbeddingChoice or hasattr(entry, 'get_components'):
+                continue
+
+            task_type = dataset_properties['task_type']
+            properties = entry.get_properties()
+            if 'tabular' in task_type and not properties['handles_tabular']:
+                continue
+            elif 'image' in task_type and not properties['handles_image']:
+                continue
+            elif 'time_series' in task_type and not properties['handles_time_series']:
+                continue
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default embedding to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_embedding = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_embedding) == 0 and 'tabular' in dataset_properties['task_type']:
+            raise ValueError("No embedding found")
+
+        if available_embedding == 0:
+            return cs
+
+        if default is None:
+            defaults = [
+                'NoEmbedding',
+                'LearnedEntityEmbedding',
+            ]
+            for default_ in defaults:
+                if default_ in available_embedding:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_embedding):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_embedding,
+                                                               choice_hyperparameter.value_range))
+            if len(dataset_properties['categorical_columns']) == 0:
+                assert len(choice_hyperparameter.value_range) == 1
+                if 'NoEmbedding' not in choice_hyperparameter.value_range:
+                    raise ValueError("Provided {} in choices, however, the dataset "
+                                     "is incompatible with it".format(choice_hyperparameter.value_range))
+            embedding = CSH.CategoricalHyperparameter('__choice__',
+                                                      choice_hyperparameter.value_range,
+                                                      default_value=choice_hyperparameter.default_value)
+        else:
+            if len(dataset_properties['categorical_columns']) == 0:
+                default = 'NoEmbedding'
+                if include is not None and default not in include:
+                    raise ValueError("Provided {} in include, however, the dataset "
+                                     "is incompatible with it".format(include))
+                embedding = CSH.CategoricalHyperparameter('__choice__',
+                                                          ['NoEmbedding'],
+                                                          default_value=default)
+            else:
+                embedding = CSH.CategoricalHyperparameter('__choice__',
+                                                          list(available_embedding.keys()),
+                                                          default_value=default)
+
+        cs.add_hyperparameter(embedding)
+        for name in embedding.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_embedding[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
+                                                                                     **updates)
+            parent_hyperparameter = {'parent': embedding, 'value': name}
+            cs.add_configuration_space(
+                name,
+                config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding_choice.py b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding_choice.py
deleted file mode 100644
index 14a5c93d9..000000000
--- a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding_choice.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.setup.network_embedding.base_network_embedding import (
-    NetworkEmbeddingComponent,
-)
-
-directory = os.path.split(__file__)[0]
-_embeddings = find_components(__package__,
-                              directory,
-                              NetworkEmbeddingComponent)
-_addons = ThirdPartyComponents(NetworkEmbeddingComponent)
-
-
-def add_embedding(embedding: NetworkEmbeddingComponent) -> None:
-    _addons.add_component(embedding)
-
-
-class NetworkEmbeddingChoice(autoPyTorchChoice):
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available embedding components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all baseembedding components available
-                as choices for learning rate scheduling
-        """
-        components = OrderedDict()
-        components.update(_embeddings)
-        components.update(_addons.components)
-        return components
-
-    def get_available_components(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        include: List[str] = None,
-        exclude: List[str] = None,
-    ) -> Dict[str, autoPyTorchComponent]:
-        """Filters out components based on user provided
-        include/exclude directives, as well as the dataset properties
-
-        Args:
-         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
-            to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
-             to remove from the configuration space
-         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
-             of the dataset to guide the pipeline choices of components
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: A filtered dict of learning
-                rate embeddings
-
-        """
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        if include is not None and exclude is not None:
-            raise ValueError(
-                "The argument include and exclude cannot be used together.")
-
-        available_comp = self.get_components()
-
-        if include is not None:
-            for incl in include:
-                if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
-
-        components_dict = OrderedDict()
-        for name in available_comp:
-            if include is not None and name not in include:
-                continue
-            elif exclude is not None and name in exclude:
-                continue
-
-            entry = available_comp[name]
-
-            # Exclude itself to avoid infinite loop
-            if entry == NetworkEmbeddingChoice or hasattr(entry, 'get_components'):
-                continue
-
-            task_type = dataset_properties['task_type']
-            properties = entry.get_properties()
-            if 'tabular' in task_type and not properties['handles_tabular']:
-                continue
-            elif 'image' in task_type and not properties['handles_image']:
-                continue
-            elif 'time_series' in task_type and not properties['handles_time_series']:
-                continue
-
-            components_dict[name] = entry
-
-        return components_dict
-
-    def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default embedding to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        # Compile a list of legal preprocessors for this problem
-        available_embedding = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(available_embedding) == 0 and 'tabular' in dataset_properties['task_type']:
-            raise ValueError("No embedding found")
-
-        if available_embedding == 0:
-            return cs
-
-        if default is None:
-            defaults = [
-                'NoEmbedding',
-                'LearnedEntityEmbedding',
-            ]
-            for default_ in defaults:
-                if default_ in available_embedding:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_embedding):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_embedding,
-                                                               choice_hyperparameter.value_range))
-            if len(dataset_properties['categorical_columns']) == 0:
-                assert len(choice_hyperparameter.value_range) == 1
-                if 'NoEmbedding' not in choice_hyperparameter.value_range:
-                    raise ValueError("Provided {} in choices, however, the dataset "
-                                     "is incompatible with it".format(choice_hyperparameter.value_range))
-            embedding = CSH.CategoricalHyperparameter('__choice__',
-                                                      choice_hyperparameter.value_range,
-                                                      default_value=choice_hyperparameter.default_value)
-        else:
-            if len(dataset_properties['categorical_columns']) == 0:
-                default = 'NoEmbedding'
-                if include is not None and default not in include:
-                    raise ValueError("Provided {} in include, however, the dataset "
-                                     "is incompatible with it".format(include))
-                embedding = CSH.CategoricalHyperparameter('__choice__',
-                                                          ['NoEmbedding'],
-                                                          default_value=default)
-            else:
-                embedding = CSH.CategoricalHyperparameter('__choice__',
-                                                          list(available_embedding.keys()),
-                                                          default_value=default)
-
-        cs.add_hyperparameter(embedding)
-        for name in embedding.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_embedding[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
-                                                                                     **updates)
-            parent_hyperparameter = {'parent': embedding, 'value': name}
-            cs.add_configuration_space(
-                name,
-                config_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call transform before the object is initialized"
-        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_head/__init__.py b/autoPyTorch/pipeline/components/setup/network_head/__init__.py
index e69de29bb..346f5fba3 100644
--- a/autoPyTorch/pipeline/components/setup/network_head/__init__.py
+++ b/autoPyTorch/pipeline/components/setup/network_head/__init__.py
@@ -0,0 +1,193 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.network_head.base_network_head import (
+    NetworkHeadComponent,
+)
+
+directory = os.path.split(__file__)[0]
+_heads = find_components(__package__,
+                         directory,
+                         NetworkHeadComponent)
+_addons = ThirdPartyComponents(NetworkHeadComponent)
+
+
+def add_head(head: NetworkHeadComponent) -> None:
+    _addons.add_component(head)
+
+
+class NetworkHeadChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available head components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all NetworkHeadComponents available
+                as choices for learning rate scheduling
+        """
+        components = OrderedDict()
+        components.update(_heads)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of learning
+                rate heads
+
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == NetworkHeadChoice or hasattr(entry, 'get_components'):
+                continue
+
+            task_type = dataset_properties['task_type']
+            properties = entry.get_properties()
+            if 'tabular' in task_type and not properties['handles_tabular']:
+                continue
+            elif 'image' in task_type and not properties['handles_image']:
+                continue
+            elif 'time_series' in task_type and not properties['handles_time_series']:
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here for
+            # heads based on the dataset!
+            # TODO: Think if there is any case where a head
+            # is not recommended for a certain dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default head to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_heads = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_heads) == 0:
+            raise ValueError("No head found")
+
+        if default is None:
+            defaults = [
+                'FullyConnectedHead',
+                'FullyConvolutional2DHead',
+            ]
+            for default_ in defaults:
+                if default_ in available_heads:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_heads):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_heads,
+                                                               choice_hyperparameter.value_range))
+            head = CSH.CategoricalHyperparameter('__choice__',
+                                                 choice_hyperparameter.value_range,
+                                                 default_value=choice_hyperparameter.default_value)
+        else:
+            head = CSH.CategoricalHyperparameter(
+                '__choice__',
+                list(available_heads.keys()),
+                default_value=default)
+        cs.add_hyperparameter(head)
+        for name in head.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_heads[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
+                                                                                 **updates)
+            parent_hyperparameter = {'parent': head, 'value': name}
+            cs.add_configuration_space(
+                name,
+                config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_head/base_network_head_choice.py b/autoPyTorch/pipeline/components/setup/network_head/base_network_head_choice.py
deleted file mode 100644
index c03e860fc..000000000
--- a/autoPyTorch/pipeline/components/setup/network_head/base_network_head_choice.py
+++ /dev/null
@@ -1,193 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.setup.network_head.base_network_head import (
-    NetworkHeadComponent,
-)
-
-directory = os.path.split(__file__)[0]
-_heads = find_components(__package__,
-                         directory,
-                         NetworkHeadComponent)
-_addons = ThirdPartyComponents(NetworkHeadComponent)
-
-
-def add_head(head: NetworkHeadComponent) -> None:
-    _addons.add_component(head)
-
-
-class NetworkHeadChoice(autoPyTorchChoice):
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available head components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all basehead components available
-                as choices for learning rate scheduling
-        """
-        components = OrderedDict()
-        components.update(_heads)
-        components.update(_addons.components)
-        return components
-
-    def get_available_components(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        include: List[str] = None,
-        exclude: List[str] = None,
-    ) -> Dict[str, autoPyTorchComponent]:
-        """Filters out components based on user provided
-        include/exclude directives, as well as the dataset properties
-
-        Args:
-         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
-            to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
-             to remove from the configuration space
-         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
-             of the dataset to guide the pipeline choices of components
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: A filtered dict of learning
-                rate heads
-
-        """
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        if include is not None and exclude is not None:
-            raise ValueError(
-                "The argument include and exclude cannot be used together.")
-
-        available_comp = self.get_components()
-
-        if include is not None:
-            for incl in include:
-                if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
-
-        components_dict = OrderedDict()
-        for name in available_comp:
-            if include is not None and name not in include:
-                continue
-            elif exclude is not None and name in exclude:
-                continue
-
-            entry = available_comp[name]
-
-            # Exclude itself to avoid infinite loop
-            if entry == NetworkHeadChoice or hasattr(entry, 'get_components'):
-                continue
-
-            task_type = dataset_properties['task_type']
-            properties = entry.get_properties()
-            if 'tabular' in task_type and not properties['handles_tabular']:
-                continue
-            elif 'image' in task_type and not properties['handles_image']:
-                continue
-            elif 'time_series' in task_type and not properties['handles_time_series']:
-                continue
-
-            # target_type = dataset_properties['target_type']
-            # Apply some automatic filtering here for
-            # heads based on the dataset!
-            # TODO: Think if there is any case where a head
-            # is not recommended for a certain dataset
-
-            components_dict[name] = entry
-
-        return components_dict
-
-    def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default head to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        # Compile a list of legal preprocessors for this problem
-        available_heads = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(available_heads) == 0:
-            raise ValueError("No head found")
-
-        if default is None:
-            defaults = [
-                'FullyConnectedHead',
-                'FullyConvolutional2DHead',
-            ]
-            for default_ in defaults:
-                if default_ in available_heads:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_heads):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_heads,
-                                                               choice_hyperparameter.value_range))
-            head = CSH.CategoricalHyperparameter('__choice__',
-                                                 choice_hyperparameter.value_range,
-                                                 default_value=choice_hyperparameter.default_value)
-        else:
-            head = CSH.CategoricalHyperparameter(
-                '__choice__',
-                list(available_heads.keys()),
-                default_value=default)
-        cs.add_hyperparameter(head)
-        for name in head.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_heads[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
-                                                                                 **updates)
-            parent_hyperparameter = {'parent': head, 'value': name}
-            cs.add_configuration_space(
-                name,
-                config_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call transform before the object is initialized"
-        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/__init__.py b/autoPyTorch/pipeline/components/setup/network_initializer/__init__.py
index e69de29bb..f75f00c65 100644
--- a/autoPyTorch/pipeline/components/setup/network_initializer/__init__.py
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/__init__.py
@@ -0,0 +1,179 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
+    BaseNetworkInitializerComponent
+)
+
+directory = os.path.split(__file__)[0]
+_initializers = find_components(__package__,
+                                directory,
+                                BaseNetworkInitializerComponent)
+_addons = ThirdPartyComponents(BaseNetworkInitializerComponent)
+
+
+def add_network_initializer(initializer: BaseNetworkInitializerComponent) -> None:
+    _addons.add_component(initializer)
+
+
+class NetworkInitializerChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available initializer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseNetworkInitializerComponents available
+                as choices
+        """
+        components = OrderedDict()
+        components.update(_initializers)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of initializer
+                components
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == NetworkInitializerChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here based on dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default component to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        initializers = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(initializers) == 0:
+            raise ValueError("No initializers found")
+
+        if default is None:
+            defaults = ['XavierInit',
+                        ]
+            for default_ in defaults:
+                if default_ in initializers:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(initializers):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               initializers,
+                                                               choice_hyperparameter.value_range))
+            initializer = CSH.CategoricalHyperparameter('__choice__',
+                                                        choice_hyperparameter.value_range,
+                                                        default_value=choice_hyperparameter.default_value)
+        else:
+            initializer = CSH.CategoricalHyperparameter(
+                '__choice__',
+                list(initializers.keys()),
+                default_value=default
+            )
+        cs.add_hyperparameter(initializer)
+        for name in initializer.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = initializers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
+                                                                              **updates)
+            parent_hyperparameter = {'parent': initializer, 'value': name}
+            cs.add_configuration_space(
+                name,
+                config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/base_network_init_choice.py b/autoPyTorch/pipeline/components/setup/network_initializer/base_network_init_choice.py
deleted file mode 100644
index cc7dfcfc6..000000000
--- a/autoPyTorch/pipeline/components/setup/network_initializer/base_network_init_choice.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
-    BaseNetworkInitializerComponent
-)
-
-directory = os.path.split(__file__)[0]
-_initializers = find_components(__package__,
-                                directory,
-                                BaseNetworkInitializerComponent)
-_addons = ThirdPartyComponents(BaseNetworkInitializerComponent)
-
-
-def add_network_initializer(initializer: BaseNetworkInitializerComponent) -> None:
-    _addons.add_component(initializer)
-
-
-class NetworkInitializerChoice(autoPyTorchChoice):
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available initializer components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all BaseInitializerComponent components available
-                as choices
-        """
-        components = OrderedDict()
-        components.update(_initializers)
-        components.update(_addons.components)
-        return components
-
-    def get_available_components(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        include: List[str] = None,
-        exclude: List[str] = None,
-    ) -> Dict[str, autoPyTorchComponent]:
-        """Filters out components based on user provided
-        include/exclude directives, as well as the dataset properties
-
-        Args:
-         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
-            to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
-             to remove from the configuration space
-         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
-             of the dataset to guide the pipeline choices of components
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: A filtered dict of initializer
-                components
-        """
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        if include is not None and exclude is not None:
-            raise ValueError(
-                "The argument include and exclude cannot be used together.")
-
-        available_comp = self.get_components()
-
-        if include is not None:
-            for incl in include:
-                if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
-
-        components_dict = OrderedDict()
-        for name in available_comp:
-            if include is not None and name not in include:
-                continue
-            elif exclude is not None and name in exclude:
-                continue
-
-            entry = available_comp[name]
-
-            # Exclude itself to avoid infinite loop
-            if entry == NetworkInitializerChoice or hasattr(entry, 'get_components'):
-                continue
-
-            # target_type = dataset_properties['target_type']
-            # Apply some automatic filtering here based on dataset
-
-            components_dict[name] = entry
-
-        return components_dict
-
-    def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default component to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        # Compile a list of legal preprocessors for this problem
-        initializers = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(initializers) == 0:
-            raise ValueError("No initializers found")
-
-        if default is None:
-            defaults = ['XavierInit',
-                        ]
-            for default_ in defaults:
-                if default_ in initializers:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(initializers):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               initializers,
-                                                               choice_hyperparameter.value_range))
-            initializer = CSH.CategoricalHyperparameter('__choice__',
-                                                        choice_hyperparameter.value_range,
-                                                        default_value=choice_hyperparameter.default_value)
-        else:
-            initializer = CSH.CategoricalHyperparameter(
-                '__choice__',
-                list(initializers.keys()),
-                default_value=default
-            )
-        cs.add_hyperparameter(initializer)
-        for name in initializer.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = initializers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
-                                                                              **updates)
-            parent_hyperparameter = {'parent': initializer, 'value': name}
-            cs.add_configuration_space(
-                name,
-                config_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call transform before the object is initialized"
-        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/__init__.py b/autoPyTorch/pipeline/components/setup/optimizer/__init__.py
index e69de29bb..93f61e74b 100644
--- a/autoPyTorch/pipeline/components/setup/optimizer/__init__.py
+++ b/autoPyTorch/pipeline/components/setup/optimizer/__init__.py
@@ -0,0 +1,181 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer import BaseOptimizerComponent
+
+directory = os.path.split(__file__)[0]
+_optimizers = find_components(__package__,
+                              directory,
+                              BaseOptimizerComponent)
+_addons = ThirdPartyComponents(BaseOptimizerComponent)
+
+
+def add_optimizer(optimizer: BaseOptimizerComponent) -> None:
+    _addons.add_component(optimizer)
+
+
+class OptimizerChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available optimizer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseOptimizerComponents  available
+                as choices
+        """
+        components = OrderedDict()
+        components.update(_optimizers)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of Optimizer
+                components
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == OptimizerChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here based on dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default component to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_optimizer = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_optimizer) == 0:
+            raise ValueError("No Optimizer found")
+
+        if default is None:
+            defaults = [
+                'AdamOptimizer',
+                'AdamWOptimizer',
+                'SGDOptimizer',
+                'RMSpropOptimizer'
+            ]
+            for default_ in defaults:
+                if default_ in available_optimizer:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_optimizer):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_optimizer,
+                                                               choice_hyperparameter.value_range))
+            optimizer = CSH.CategoricalHyperparameter('__choice__',
+                                                      choice_hyperparameter.value_range,
+                                                      default_value=choice_hyperparameter.default_value)
+        else:
+            optimizer = CSH.CategoricalHyperparameter(
+                '__choice__',
+                list(available_optimizer.keys()),
+                default_value=default
+            )
+        cs.add_hyperparameter(optimizer)
+        for name in optimizer.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_optimizer[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
+                                                                                     **updates)
+            parent_hyperparameter = {'parent': optimizer, 'value': name}
+            cs.add_configuration_space(
+                name,
+                config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer_choice.py b/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer_choice.py
deleted file mode 100644
index 93f61e74b..000000000
--- a/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer_choice.py
+++ /dev/null
@@ -1,181 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer import BaseOptimizerComponent
-
-directory = os.path.split(__file__)[0]
-_optimizers = find_components(__package__,
-                              directory,
-                              BaseOptimizerComponent)
-_addons = ThirdPartyComponents(BaseOptimizerComponent)
-
-
-def add_optimizer(optimizer: BaseOptimizerComponent) -> None:
-    _addons.add_component(optimizer)
-
-
-class OptimizerChoice(autoPyTorchChoice):
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available optimizer components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all BaseOptimizerComponents  available
-                as choices
-        """
-        components = OrderedDict()
-        components.update(_optimizers)
-        components.update(_addons.components)
-        return components
-
-    def get_available_components(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        include: List[str] = None,
-        exclude: List[str] = None,
-    ) -> Dict[str, autoPyTorchComponent]:
-        """Filters out components based on user provided
-        include/exclude directives, as well as the dataset properties
-
-        Args:
-         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
-            to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
-             to remove from the configuration space
-         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
-             of the dataset to guide the pipeline choices of components
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: A filtered dict of Optimizer
-                components
-        """
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        if include is not None and exclude is not None:
-            raise ValueError(
-                "The argument include and exclude cannot be used together.")
-
-        available_comp = self.get_components()
-
-        if include is not None:
-            for incl in include:
-                if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
-
-        components_dict = OrderedDict()
-        for name in available_comp:
-            if include is not None and name not in include:
-                continue
-            elif exclude is not None and name in exclude:
-                continue
-
-            entry = available_comp[name]
-
-            # Exclude itself to avoid infinite loop
-            if entry == OptimizerChoice or hasattr(entry, 'get_components'):
-                continue
-
-            # target_type = dataset_properties['target_type']
-            # Apply some automatic filtering here based on dataset
-
-            components_dict[name] = entry
-
-        return components_dict
-
-    def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default component to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        # Compile a list of legal preprocessors for this problem
-        available_optimizer = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(available_optimizer) == 0:
-            raise ValueError("No Optimizer found")
-
-        if default is None:
-            defaults = [
-                'AdamOptimizer',
-                'AdamWOptimizer',
-                'SGDOptimizer',
-                'RMSpropOptimizer'
-            ]
-            for default_ in defaults:
-                if default_ in available_optimizer:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_optimizer):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_optimizer,
-                                                               choice_hyperparameter.value_range))
-            optimizer = CSH.CategoricalHyperparameter('__choice__',
-                                                      choice_hyperparameter.value_range,
-                                                      default_value=choice_hyperparameter.default_value)
-        else:
-            optimizer = CSH.CategoricalHyperparameter(
-                '__choice__',
-                list(available_optimizer.keys()),
-                default_value=default
-            )
-        cs.add_hyperparameter(optimizer)
-        for name in optimizer.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_optimizer[name].get_hyperparameter_search_space(dataset_properties,  # type: ignore
-                                                                                     **updates)
-            parent_hyperparameter = {'parent': optimizer, 'value': name}
-            cs.add_configuration_space(
-                name,
-                config_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call transform before the object is initialized"
-        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/__init__.py b/autoPyTorch/pipeline/components/setup/traditional_ml/__init__.py
index e69de29bb..3512fa6ce 100644
--- a/autoPyTorch/pipeline/components/setup/traditional_ml/__init__.py
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/__init__.py
@@ -0,0 +1,162 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.traditional_ml.base_model import BaseModelComponent
+
+
+directory = os.path.split(__file__)[0]
+_models = find_components(__package__,
+                          directory,
+                          BaseModelComponent)
+_addons = ThirdPartyComponents(BaseModelComponent)
+
+
+def add_model(model: BaseModelComponent) -> None:
+    _addons.add_component(model)
+
+
+class ModelChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available model components
+        Args:
+            None
+        Returns:
+            Dict[str, autoPyTorchComponent]: all baseNetwork components available
+                as choices
+        """
+        components = OrderedDict()
+        components.update(_models)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+            self,
+            dataset_properties: Optional[Dict[str, str]] = None,
+            include: List[str] = None,
+            exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of Network
+                components
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == ModelChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here based on dataset
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+            self,
+            dataset_properties: Optional[Dict[str, str]] = None,
+            default: Optional[str] = None,
+            include: Optional[List[str]] = None,
+            exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default component to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_models = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_models) == 0:
+            raise ValueError("No Network found")
+
+        if default is None:
+            defaults: List[Any] = []
+            for default_ in defaults:
+                if default_ in available_models:
+                    default = default_
+                    break
+
+        model = CSH.CategoricalHyperparameter(
+            '__choice__',
+            list(available_models.keys()),
+            default_value=default
+        )
+        cs.add_hyperparameter(model)
+        for name in available_models:
+            model_configuration_space = available_models[name]. \
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': model, 'value': name}
+            cs.add_configuration_space(
+                name,
+                model_configuration_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
+
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call predict before the object is initialized"
+        return self.choice.predict_proba(X)
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/base_model_choice.py b/autoPyTorch/pipeline/components/setup/traditional_ml/base_model_choice.py
deleted file mode 100644
index 3512fa6ce..000000000
--- a/autoPyTorch/pipeline/components/setup/traditional_ml/base_model_choice.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import os
-from collections import OrderedDict
-from typing import Any, Dict, List, Optional
-
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.setup.traditional_ml.base_model import BaseModelComponent
-
-
-directory = os.path.split(__file__)[0]
-_models = find_components(__package__,
-                          directory,
-                          BaseModelComponent)
-_addons = ThirdPartyComponents(BaseModelComponent)
-
-
-def add_model(model: BaseModelComponent) -> None:
-    _addons.add_component(model)
-
-
-class ModelChoice(autoPyTorchChoice):
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available model components
-        Args:
-            None
-        Returns:
-            Dict[str, autoPyTorchComponent]: all baseNetwork components available
-                as choices
-        """
-        components = OrderedDict()
-        components.update(_models)
-        components.update(_addons.components)
-        return components
-
-    def get_available_components(
-            self,
-            dataset_properties: Optional[Dict[str, str]] = None,
-            include: List[str] = None,
-            exclude: List[str] = None,
-    ) -> Dict[str, autoPyTorchComponent]:
-        """Filters out components based on user provided
-        include/exclude directives, as well as the dataset properties
-        Args:
-         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
-            to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
-             to remove from the configuration space
-         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
-             of the dataset to guide the pipeline choices of components
-        Returns:
-            Dict[str, autoPyTorchComponent]: A filtered dict of Network
-                components
-        """
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        if include is not None and exclude is not None:
-            raise ValueError(
-                "The argument include and exclude cannot be used together.")
-
-        available_comp = self.get_components()
-
-        if include is not None:
-            for incl in include:
-                if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
-
-        components_dict = OrderedDict()
-        for name in available_comp:
-            if include is not None and name not in include:
-                continue
-            elif exclude is not None and name in exclude:
-                continue
-
-            entry = available_comp[name]
-
-            # Exclude itself to avoid infinite loop
-            if entry == ModelChoice or hasattr(entry, 'get_components'):
-                continue
-
-            # target_type = dataset_properties['target_type']
-            # Apply some automatic filtering here based on dataset
-            components_dict[name] = entry
-
-        return components_dict
-
-    def get_hyperparameter_search_space(
-            self,
-            dataset_properties: Optional[Dict[str, str]] = None,
-            default: Optional[str] = None,
-            include: Optional[List[str]] = None,
-            exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default component to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        # Compile a list of legal preprocessors for this problem
-        available_models = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(available_models) == 0:
-            raise ValueError("No Network found")
-
-        if default is None:
-            defaults: List[Any] = []
-            for default_ in defaults:
-                if default_ in available_models:
-                    default = default_
-                    break
-
-        model = CSH.CategoricalHyperparameter(
-            '__choice__',
-            list(available_models.keys()),
-            default_value=default
-        )
-        cs.add_hyperparameter(model)
-        for name in available_models:
-            model_configuration_space = available_models[name]. \
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': model, 'value': name}
-            cs.add_configuration_space(
-                name,
-                model_configuration_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call transform before the object is initialized"
-        return self.choice.transform(X)
-
-    def predict_proba(self, X: np.ndarray) -> np.ndarray:
-        assert self.choice is not None, "Cannot call predict before the object is initialized"
-        return self.choice.predict_proba(X)
diff --git a/autoPyTorch/pipeline/components/training/trainer/__init__.py b/autoPyTorch/pipeline/components/training/trainer/__init__.py
index e69de29bb..248d8085b 100755
--- a/autoPyTorch/pipeline/components/training/trainer/__init__.py
+++ b/autoPyTorch/pipeline/components/training/trainer/__init__.py
@@ -0,0 +1,508 @@
+import collections
+import logging.handlers
+import os
+import tempfile
+import time
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+)
+
+import numpy as np
+
+import torch
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.utils.tensorboard.writer import SummaryWriter
+
+from autoPyTorch.constants import STRING_TO_TASK_TYPES
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.training.losses import get_loss
+from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics
+from autoPyTorch.pipeline.components.training.trainer.base_trainer import (
+    BaseTrainerComponent,
+    BudgetTracker,
+    RunSummary,
+)
+from autoPyTorch.utils.common import FitRequirement, get_device_from_fit_dictionary
+from autoPyTorch.utils.logging_ import get_named_client_logger
+
+trainer_directory = os.path.split(__file__)[0]
+_trainers = find_components(__package__,
+                            trainer_directory,
+                            BaseTrainerComponent)
+_addons = ThirdPartyComponents(BaseTrainerComponent)
+
+
+def add_trainer(trainer: BaseTrainerComponent) -> None:
+    _addons.add_component(trainer)
+
+
+class TrainerChoice(autoPyTorchChoice):
+    """This class is an interface to the PyTorch trainer.
+
+
+    To map to pipeline terminology, a choice component will implement the epoch
+    loop through fit, whereas the component who is chosen will dictate how a single
+    epoch happens, that is, how batches of data are fed and used to train the network.
+
+    """
+
+    def __init__(self,
+                 dataset_properties: Dict[str, Any],
+                 random_state: Optional[np.random.RandomState] = None
+                 ):
+
+        super().__init__(dataset_properties=dataset_properties,
+                         random_state=random_state)
+        self.run_summary = None  # type: Optional[RunSummary]
+        self.writer = None  # type: Optional[SummaryWriter]
+        self._fit_requirements: Optional[List[FitRequirement]] = [
+            FitRequirement("lr_scheduler", (_LRScheduler,), user_defined=False, dataset_property=False),
+            FitRequirement("num_run", (int,), user_defined=False, dataset_property=False),
+            FitRequirement(
+                "optimizer", (Optimizer,), user_defined=False, dataset_property=False),
+            FitRequirement("train_data_loader",
+                           (torch.utils.data.DataLoader,),
+                           user_defined=False, dataset_property=False),
+            FitRequirement("val_data_loader",
+                           (torch.utils.data.DataLoader,),
+                           user_defined=False, dataset_property=False)]
+        self.checkpoint_dir = None  # type: Optional[str]
+
+    def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
+        return self._fit_requirements
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available trainer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all components available
+                as choices for learning rate scheduling
+        """
+        components = collections.OrderedDict()  # type: Dict[str, autoPyTorchComponent]
+        components.update(_trainers)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default scheduler to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        # Compile a list of legal trainers for this problem
+        available_trainers = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_trainers) == 0:
+            raise ValueError("No trainer found")
+
+        if default is None:
+            defaults = ['StandardTrainer',
+                        ]
+            for default_ in defaults:
+                if default_ in available_trainers:
+                    default = default_
+                    break
+        updates = self._get_search_space_updates()
+        if '__choice__' in updates.keys():
+            choice_hyperparameter = updates['__choice__']
+            if not set(choice_hyperparameter.value_range).issubset(available_trainers):
+                raise ValueError("Expected given update for {} to have "
+                                 "choices in {} got {}".format(self.__class__.__name__,
+                                                               available_trainers,
+                                                               choice_hyperparameter.value_range))
+            trainer = CategoricalHyperparameter('__choice__',
+                                                choice_hyperparameter.value_range,
+                                                default_value=choice_hyperparameter.default_value)
+        else:
+            trainer = CategoricalHyperparameter(
+                '__choice__',
+                list(available_trainers.keys()),
+                default_value=default
+            )
+        cs.add_hyperparameter(trainer)
+        for name in trainer.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_trainers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
+                                                                                    **updates)
+            parent_hyperparameter = {'parent': trainer, 'value': name}
+            cs.add_configuration_space(
+                name,
+                config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """The transform function calls the transform function of the
+        underlying model and returns the transformed array.
+
+        Args:
+            X (np.ndarray): input features
+
+        Returns:
+            np.ndarray: Transformed features
+        """
+        X.update({'run_summary': self.run_summary})
+        return X
+
+    def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+        # Make sure that the prerequisites are there
+        self.check_requirements(X, y)
+
+        # Setup the logger
+        self.logger = get_named_client_logger(
+            name=f"{X['num_run']}_{time.time()}",
+            # Log to a user provided port else to the default logging port
+            port=X['logger_port'
+                   ] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+        )
+
+        # Call the actual fit function.
+        self._fit(
+            X=X,
+            y=y,
+            **kwargs
+        )
+
+        return cast(autoPyTorchComponent, self.choice)
+
+    def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoice':
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Comply with mypy
+        # Notice that choice here stands for the component choice framework,
+        # where we dynamically build the configuration space by selecting the available
+        # component choices. In this case, is what trainer choices are available
+        assert self.choice is not None
+
+        # Setup a Logger and other logging support
+        # Writer is not pickable -- make sure it is not saved in self
+        writer = None
+        if 'use_tensorboard_logger' in X and X['use_tensorboard_logger']:
+            writer = SummaryWriter(log_dir=X['backend'].temporary_directory)
+
+        if X["torch_num_threads"] > 0:
+            torch.set_num_threads(X["torch_num_threads"])
+
+        budget_tracker = BudgetTracker(
+            budget_type=X['budget_type'],
+            max_runtime=X['runtime'] if 'runtime' in X else None,
+            max_epochs=X['epochs'] if 'epochs' in X else None,
+        )
+
+        # Support additional user metrics
+        additional_metrics = X['additional_metrics'] if 'additional_metrics' in X else None
+        additional_losses = X['additional_losses'] if 'additional_losses' in X else None
+        self.choice.prepare(
+            model=X['network'],
+            metrics=get_metrics(dataset_properties=X['dataset_properties'],
+                                names=additional_metrics),
+            criterion=get_loss(X['dataset_properties'],
+                               name=additional_losses),
+            budget_tracker=budget_tracker,
+            optimizer=X['optimizer'],
+            device=get_device_from_fit_dictionary(X),
+            metrics_during_training=X['metrics_during_training'],
+            scheduler=X['lr_scheduler'],
+            task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']],
+            labels=X['y_train'][X['backend'].load_datamanager().splits[X['split_id']][0]]
+        )
+        total_parameter_count, trainable_parameter_count = self.count_parameters(X['network'])
+        self.run_summary = RunSummary(
+            total_parameter_count,
+            trainable_parameter_count,
+        )
+
+        epoch = 1
+
+        while True:
+
+            # prepare epoch
+            start_time = time.time()
+
+            self.choice.on_epoch_start(X=X, epoch=epoch)
+
+            # training
+            train_loss, train_metrics = self.choice.train_epoch(
+                train_loader=X['train_data_loader'],
+                epoch=epoch,
+                writer=writer,
+            )
+
+            val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {}
+            if self.eval_valid_each_epoch(X):
+                val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
+                if 'test_data_loader' in X and X['test_data_loader']:
+                    test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)
+
+            # Save training information
+            self.run_summary.add_performance(
+                epoch=epoch,
+                start_time=start_time,
+                end_time=time.time(),
+                train_loss=train_loss,
+                val_loss=val_loss,
+                test_loss=test_loss,
+                train_metrics=train_metrics,
+                val_metrics=val_metrics,
+                test_metrics=test_metrics,
+            )
+
+            # Save the weights of the best model and, if patience
+            # exhausted break training
+            if self.early_stop_handler(X):
+                break
+
+            if self.choice.on_epoch_end(X=X, epoch=epoch):
+                break
+
+            self.logger.debug(self.run_summary.repr_last_epoch())
+
+            # Reached max epoch on next iter, don't even go there
+            if budget_tracker.is_max_epoch_reached(epoch + 1):
+                break
+
+            epoch += 1
+
+            if 'cuda' in X['device']:
+                torch.cuda.empty_cache()
+
+        # wrap up -- add score if not evaluating every epoch
+        if not self.eval_valid_each_epoch(X):
+            val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'])
+            if 'test_data_loader' in X and X['val_data_loader']:
+                test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'])
+            self.run_summary.add_performance(
+                epoch=epoch,
+                start_time=start_time,
+                end_time=time.time(),
+                train_loss=train_loss,
+                val_loss=val_loss,
+                test_loss=test_loss,
+                train_metrics=train_metrics,
+                val_metrics=val_metrics,
+                test_metrics=test_metrics,
+            )
+            self.save_model_for_ensemble()
+
+        self.logger.info(f"Finished training with {self.run_summary.repr_last_epoch()}")
+
+        # Tag as fitted
+        self.fitted_ = True
+
+        return self
+
+    def early_stop_handler(self, X: Dict[str, Any]) -> bool:
+        """
+        If early stopping is enabled, this procedure stops the training after a
+        given patience
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+
+        Returns:
+            bool: If true, training should be stopped
+        """
+        assert self.run_summary is not None
+
+        # Allow to disable early stopping
+        if X['early_stopping'] is None or X['early_stopping'] < 0:
+            return False
+
+        # Store the best weights seen so far:
+        if self.checkpoint_dir is None:
+            self.checkpoint_dir = tempfile.mkdtemp(dir=X['backend'].temporary_directory)
+
+        epochs_since_best = self.run_summary.get_last_epoch() - self.run_summary.get_best_epoch()
+
+        # Save the checkpoint if there is a new best epoch
+        best_path = os.path.join(self.checkpoint_dir, 'best.pth')
+        if epochs_since_best == 0:
+            torch.save(X['network'].state_dict(), best_path)
+
+        if epochs_since_best > X['early_stopping']:
+            self.logger.debug(f" Early stopped model {X['num_run']} on epoch {self.run_summary.get_best_epoch()}")
+            # We will stop the training. Load the last best performing weights
+            X['network'].load_state_dict(torch.load(best_path))
+
+            # Let the tempfile module clean the temp dir
+            self.checkpoint_dir = None
+            return True
+
+        return False
+
+    def eval_valid_each_epoch(self, X: Dict[str, Any]) -> bool:
+        """
+        Returns true if we are supposed to evaluate the model on every epoch,
+        on the validation data. Usually, we only validate the data at the end,
+        but in the case of early stopping, is appealing to evaluate each epoch.
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+
+        Returns:
+            bool: if True, the model is evaluated in every epoch
+
+        """
+        if 'early_stopping' in X and X['early_stopping']:
+            return True
+
+        # We need to know if we should reduce the rate based on val loss
+        if 'ReduceLROnPlateau' in X['lr_scheduler'].__class__.__name__:
+            return True
+
+        return False
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+
+        # make sure the parent requirements are honored
+        super().check_requirements(X, y)
+
+        # We need a working dir in where to put our data
+        if 'backend' not in X:
+            raise ValueError('Need a backend to provide the working directory, '
+                             "yet 'backend' was not found in the fit dictionary")
+
+        # Whether we should evaluate metrics during training or no
+        if 'metrics_during_training' not in X:
+            raise ValueError('Missing metrics_during_training in the fit dictionary')
+
+        # Setup Components
+        if 'lr_scheduler' not in X:
+            raise ValueError("Learning rate scheduler not found in the fit dictionary!")
+
+        if 'network' not in X:
+            raise ValueError("Network not found in the fit dictionary!")
+
+        if 'optimizer' not in X:
+            raise ValueError("Optimizer not found in the fit dictionary!")
+
+        # Training Components
+        if 'train_data_loader' not in X:
+            raise ValueError("train_data_loader not found in the fit dictionary!")
+
+        if 'val_data_loader' not in X:
+            raise ValueError("val_data_loader not found in the fit dictionary!")
+
+        if 'budget_type' not in X:
+            raise ValueError("Budget type not found in the fit dictionary!")
+        else:
+            if 'epochs' not in X or 'runtime' not in X or 'epoch_or_time' not in X:
+                if X['budget_type'] in ['epochs', 'epoch_or_time'] and 'epochs' not in X:
+                    raise ValueError("Budget type is epochs but "
+                                     "no epochs was not found in the fit dictionary!")
+                elif X['budget_type'] in ['runtime', 'epoch_or_time'] and 'runtime' not in X:
+                    raise ValueError("Budget type is runtime but "
+                                     "no maximum number of seconds was provided!")
+            else:
+                raise ValueError("Unsupported budget type provided: {}".format(
+                    X['budget_type']
+                ))
+
+        if 'num_run' not in X:
+            raise ValueError('To fit a trainer, expected fit dictionary to have a num_run')
+
+        for config_option in ["torch_num_threads", 'device']:
+            if config_option not in X:
+                raise ValueError("To fit a trainer, expected fit dictionary to have a {}".format(
+                    config_option
+                ))
+
+        # For early stopping, we need to know the patience
+        if 'early_stopping' not in X:
+            raise ValueError('To fit a Trainer, expected fit dictionary to have early_stopping')
+
+    @staticmethod
+    def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
+        """
+        A method to get the total/trainable parameter count from the model
+
+        Args:
+            model (torch.nn.Module): the module from which to count parameters
+
+        Returns:
+            total_parameter_count: the total number of parameters of the model
+            trainable_parameter_count: only the parameters being optimized
+        """
+        total_parameter_count = sum(
+            p.numel() for p in model.parameters())
+        trainable_parameter_count = sum(
+            p.numel() for p in model.parameters() if p.requires_grad)
+        return total_parameter_count, trainable_parameter_count
+
+    def save_model_for_ensemble(self) -> str:
+        raise NotImplementedError()
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = str(self.run_summary)
+        return string
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
deleted file mode 100755
index 248d8085b..000000000
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
+++ /dev/null
@@ -1,508 +0,0 @@
-import collections
-import logging.handlers
-import os
-import tempfile
-import time
-from typing import Any, Dict, List, Optional, Tuple, cast
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import (
-    CategoricalHyperparameter,
-)
-
-import numpy as np
-
-import torch
-from torch.optim import Optimizer
-from torch.optim.lr_scheduler import _LRScheduler
-from torch.utils.tensorboard.writer import SummaryWriter
-
-from autoPyTorch.constants import STRING_TO_TASK_TYPES
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.base_component import (
-    ThirdPartyComponents,
-    autoPyTorchComponent,
-    find_components,
-)
-from autoPyTorch.pipeline.components.training.losses import get_loss
-from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics
-from autoPyTorch.pipeline.components.training.trainer.base_trainer import (
-    BaseTrainerComponent,
-    BudgetTracker,
-    RunSummary,
-)
-from autoPyTorch.utils.common import FitRequirement, get_device_from_fit_dictionary
-from autoPyTorch.utils.logging_ import get_named_client_logger
-
-trainer_directory = os.path.split(__file__)[0]
-_trainers = find_components(__package__,
-                            trainer_directory,
-                            BaseTrainerComponent)
-_addons = ThirdPartyComponents(BaseTrainerComponent)
-
-
-def add_trainer(trainer: BaseTrainerComponent) -> None:
-    _addons.add_component(trainer)
-
-
-class TrainerChoice(autoPyTorchChoice):
-    """This class is an interface to the PyTorch trainer.
-
-
-    To map to pipeline terminology, a choice component will implement the epoch
-    loop through fit, whereas the component who is chosen will dictate how a single
-    epoch happens, that is, how batches of data are fed and used to train the network.
-
-    """
-
-    def __init__(self,
-                 dataset_properties: Dict[str, Any],
-                 random_state: Optional[np.random.RandomState] = None
-                 ):
-
-        super().__init__(dataset_properties=dataset_properties,
-                         random_state=random_state)
-        self.run_summary = None  # type: Optional[RunSummary]
-        self.writer = None  # type: Optional[SummaryWriter]
-        self._fit_requirements: Optional[List[FitRequirement]] = [
-            FitRequirement("lr_scheduler", (_LRScheduler,), user_defined=False, dataset_property=False),
-            FitRequirement("num_run", (int,), user_defined=False, dataset_property=False),
-            FitRequirement(
-                "optimizer", (Optimizer,), user_defined=False, dataset_property=False),
-            FitRequirement("train_data_loader",
-                           (torch.utils.data.DataLoader,),
-                           user_defined=False, dataset_property=False),
-            FitRequirement("val_data_loader",
-                           (torch.utils.data.DataLoader,),
-                           user_defined=False, dataset_property=False)]
-        self.checkpoint_dir = None  # type: Optional[str]
-
-    def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
-        return self._fit_requirements
-
-    def get_components(self) -> Dict[str, autoPyTorchComponent]:
-        """Returns the available trainer components
-
-        Args:
-            None
-
-        Returns:
-            Dict[str, autoPyTorchComponent]: all components available
-                as choices for learning rate scheduling
-        """
-        components = collections.OrderedDict()  # type: Dict[str, autoPyTorchComponent]
-        components.update(_trainers)
-        components.update(_addons.components)
-        return components
-
-    def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
-    ) -> ConfigurationSpace:
-        """Returns the configuration space of the current chosen components
-
-        Args:
-            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
-            default (Optional[str]): Default scheduler to use
-            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
-                list, and will exclusively use this components.
-            exclude: Optional[Dict[str, Any]]: which components to skip
-
-        Returns:
-            ConfigurationSpace: the configuration space of the hyper-parameters of the
-                 chosen component
-        """
-        cs = ConfigurationSpace()
-
-        if dataset_properties is None:
-            dataset_properties = {}
-
-        dataset_properties = {**self.dataset_properties, **dataset_properties}
-
-        # Compile a list of legal trainers for this problem
-        available_trainers = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
-
-        if len(available_trainers) == 0:
-            raise ValueError("No trainer found")
-
-        if default is None:
-            defaults = ['StandardTrainer',
-                        ]
-            for default_ in defaults:
-                if default_ in available_trainers:
-                    default = default_
-                    break
-        updates = self._get_search_space_updates()
-        if '__choice__' in updates.keys():
-            choice_hyperparameter = updates['__choice__']
-            if not set(choice_hyperparameter.value_range).issubset(available_trainers):
-                raise ValueError("Expected given update for {} to have "
-                                 "choices in {} got {}".format(self.__class__.__name__,
-                                                               available_trainers,
-                                                               choice_hyperparameter.value_range))
-            trainer = CategoricalHyperparameter('__choice__',
-                                                choice_hyperparameter.value_range,
-                                                default_value=choice_hyperparameter.default_value)
-        else:
-            trainer = CategoricalHyperparameter(
-                '__choice__',
-                list(available_trainers.keys()),
-                default_value=default
-            )
-        cs.add_hyperparameter(trainer)
-        for name in trainer.choices:
-            updates = self._get_search_space_updates(prefix=name)
-            config_space = available_trainers[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
-                                                                                    **updates)
-            parent_hyperparameter = {'parent': trainer, 'value': name}
-            cs.add_configuration_space(
-                name,
-                config_space,
-                parent_hyperparameter=parent_hyperparameter
-            )
-
-        self.configuration_space_ = cs
-        self.dataset_properties_ = dataset_properties
-        return cs
-
-    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
-        """The transform function calls the transform function of the
-        underlying model and returns the transformed array.
-
-        Args:
-            X (np.ndarray): input features
-
-        Returns:
-            np.ndarray: Transformed features
-        """
-        X.update({'run_summary': self.run_summary})
-        return X
-
-    def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchComponent:
-        """
-        Fits a component by using an input dictionary with pre-requisites
-
-        Args:
-            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
-            y (Any): not used. To comply with sklearn API
-
-        Returns:
-            A instance of self
-        """
-        # Make sure that the prerequisites are there
-        self.check_requirements(X, y)
-
-        # Setup the logger
-        self.logger = get_named_client_logger(
-            name=f"{X['num_run']}_{time.time()}",
-            # Log to a user provided port else to the default logging port
-            port=X['logger_port'
-                   ] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT,
-        )
-
-        # Call the actual fit function.
-        self._fit(
-            X=X,
-            y=y,
-            **kwargs
-        )
-
-        return cast(autoPyTorchComponent, self.choice)
-
-    def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoice':
-        """
-        Fits a component by using an input dictionary with pre-requisites
-
-        Args:
-            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
-            y (Any): not used. To comply with sklearn API
-
-        Returns:
-            A instance of self
-        """
-
-        # Comply with mypy
-        # Notice that choice here stands for the component choice framework,
-        # where we dynamically build the configuration space by selecting the available
-        # component choices. In this case, is what trainer choices are available
-        assert self.choice is not None
-
-        # Setup a Logger and other logging support
-        # Writer is not pickable -- make sure it is not saved in self
-        writer = None
-        if 'use_tensorboard_logger' in X and X['use_tensorboard_logger']:
-            writer = SummaryWriter(log_dir=X['backend'].temporary_directory)
-
-        if X["torch_num_threads"] > 0:
-            torch.set_num_threads(X["torch_num_threads"])
-
-        budget_tracker = BudgetTracker(
-            budget_type=X['budget_type'],
-            max_runtime=X['runtime'] if 'runtime' in X else None,
-            max_epochs=X['epochs'] if 'epochs' in X else None,
-        )
-
-        # Support additional user metrics
-        additional_metrics = X['additional_metrics'] if 'additional_metrics' in X else None
-        additional_losses = X['additional_losses'] if 'additional_losses' in X else None
-        self.choice.prepare(
-            model=X['network'],
-            metrics=get_metrics(dataset_properties=X['dataset_properties'],
-                                names=additional_metrics),
-            criterion=get_loss(X['dataset_properties'],
-                               name=additional_losses),
-            budget_tracker=budget_tracker,
-            optimizer=X['optimizer'],
-            device=get_device_from_fit_dictionary(X),
-            metrics_during_training=X['metrics_during_training'],
-            scheduler=X['lr_scheduler'],
-            task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']],
-            labels=X['y_train'][X['backend'].load_datamanager().splits[X['split_id']][0]]
-        )
-        total_parameter_count, trainable_parameter_count = self.count_parameters(X['network'])
-        self.run_summary = RunSummary(
-            total_parameter_count,
-            trainable_parameter_count,
-        )
-
-        epoch = 1
-
-        while True:
-
-            # prepare epoch
-            start_time = time.time()
-
-            self.choice.on_epoch_start(X=X, epoch=epoch)
-
-            # training
-            train_loss, train_metrics = self.choice.train_epoch(
-                train_loader=X['train_data_loader'],
-                epoch=epoch,
-                writer=writer,
-            )
-
-            val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {}
-            if self.eval_valid_each_epoch(X):
-                val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
-                if 'test_data_loader' in X and X['test_data_loader']:
-                    test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)
-
-            # Save training information
-            self.run_summary.add_performance(
-                epoch=epoch,
-                start_time=start_time,
-                end_time=time.time(),
-                train_loss=train_loss,
-                val_loss=val_loss,
-                test_loss=test_loss,
-                train_metrics=train_metrics,
-                val_metrics=val_metrics,
-                test_metrics=test_metrics,
-            )
-
-            # Save the weights of the best model and, if patience
-            # exhausted break training
-            if self.early_stop_handler(X):
-                break
-
-            if self.choice.on_epoch_end(X=X, epoch=epoch):
-                break
-
-            self.logger.debug(self.run_summary.repr_last_epoch())
-
-            # Reached max epoch on next iter, don't even go there
-            if budget_tracker.is_max_epoch_reached(epoch + 1):
-                break
-
-            epoch += 1
-
-            if 'cuda' in X['device']:
-                torch.cuda.empty_cache()
-
-        # wrap up -- add score if not evaluating every epoch
-        if not self.eval_valid_each_epoch(X):
-            val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'])
-            if 'test_data_loader' in X and X['val_data_loader']:
-                test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'])
-            self.run_summary.add_performance(
-                epoch=epoch,
-                start_time=start_time,
-                end_time=time.time(),
-                train_loss=train_loss,
-                val_loss=val_loss,
-                test_loss=test_loss,
-                train_metrics=train_metrics,
-                val_metrics=val_metrics,
-                test_metrics=test_metrics,
-            )
-            self.save_model_for_ensemble()
-
-        self.logger.info(f"Finished training with {self.run_summary.repr_last_epoch()}")
-
-        # Tag as fitted
-        self.fitted_ = True
-
-        return self
-
-    def early_stop_handler(self, X: Dict[str, Any]) -> bool:
-        """
-        If early stopping is enabled, this procedure stops the training after a
-        given patience
-        Args:
-            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
-                mechanism, in which during a transform, a components adds relevant information
-                so that further stages can be properly fitted
-
-        Returns:
-            bool: If true, training should be stopped
-        """
-        assert self.run_summary is not None
-
-        # Allow to disable early stopping
-        if X['early_stopping'] is None or X['early_stopping'] < 0:
-            return False
-
-        # Store the best weights seen so far:
-        if self.checkpoint_dir is None:
-            self.checkpoint_dir = tempfile.mkdtemp(dir=X['backend'].temporary_directory)
-
-        epochs_since_best = self.run_summary.get_last_epoch() - self.run_summary.get_best_epoch()
-
-        # Save the checkpoint if there is a new best epoch
-        best_path = os.path.join(self.checkpoint_dir, 'best.pth')
-        if epochs_since_best == 0:
-            torch.save(X['network'].state_dict(), best_path)
-
-        if epochs_since_best > X['early_stopping']:
-            self.logger.debug(f" Early stopped model {X['num_run']} on epoch {self.run_summary.get_best_epoch()}")
-            # We will stop the training. Load the last best performing weights
-            X['network'].load_state_dict(torch.load(best_path))
-
-            # Let the tempfile module clean the temp dir
-            self.checkpoint_dir = None
-            return True
-
-        return False
-
-    def eval_valid_each_epoch(self, X: Dict[str, Any]) -> bool:
-        """
-        Returns true if we are supposed to evaluate the model on every epoch,
-        on the validation data. Usually, we only validate the data at the end,
-        but in the case of early stopping, is appealing to evaluate each epoch.
-        Args:
-            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
-                mechanism, in which during a transform, a components adds relevant information
-                so that further stages can be properly fitted
-
-        Returns:
-            bool: if True, the model is evaluated in every epoch
-
-        """
-        if 'early_stopping' in X and X['early_stopping']:
-            return True
-
-        # We need to know if we should reduce the rate based on val loss
-        if 'ReduceLROnPlateau' in X['lr_scheduler'].__class__.__name__:
-            return True
-
-        return False
-
-    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
-        """
-        A mechanism in code to ensure the correctness of the fit dictionary
-        It recursively makes sure that the children and parent level requirements
-        are honored before fit.
-
-        Args:
-            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
-                mechanism, in which during a transform, a components adds relevant information
-                so that further stages can be properly fitted
-        """
-
-        # make sure the parent requirements are honored
-        super().check_requirements(X, y)
-
-        # We need a working dir in where to put our data
-        if 'backend' not in X:
-            raise ValueError('Need a backend to provide the working directory, '
-                             "yet 'backend' was not found in the fit dictionary")
-
-        # Whether we should evaluate metrics during training or no
-        if 'metrics_during_training' not in X:
-            raise ValueError('Missing metrics_during_training in the fit dictionary')
-
-        # Setup Components
-        if 'lr_scheduler' not in X:
-            raise ValueError("Learning rate scheduler not found in the fit dictionary!")
-
-        if 'network' not in X:
-            raise ValueError("Network not found in the fit dictionary!")
-
-        if 'optimizer' not in X:
-            raise ValueError("Optimizer not found in the fit dictionary!")
-
-        # Training Components
-        if 'train_data_loader' not in X:
-            raise ValueError("train_data_loader not found in the fit dictionary!")
-
-        if 'val_data_loader' not in X:
-            raise ValueError("val_data_loader not found in the fit dictionary!")
-
-        if 'budget_type' not in X:
-            raise ValueError("Budget type not found in the fit dictionary!")
-        else:
-            if 'epochs' not in X or 'runtime' not in X or 'epoch_or_time' not in X:
-                if X['budget_type'] in ['epochs', 'epoch_or_time'] and 'epochs' not in X:
-                    raise ValueError("Budget type is epochs but "
-                                     "no epochs was not found in the fit dictionary!")
-                elif X['budget_type'] in ['runtime', 'epoch_or_time'] and 'runtime' not in X:
-                    raise ValueError("Budget type is runtime but "
-                                     "no maximum number of seconds was provided!")
-            else:
-                raise ValueError("Unsupported budget type provided: {}".format(
-                    X['budget_type']
-                ))
-
-        if 'num_run' not in X:
-            raise ValueError('To fit a trainer, expected fit dictionary to have a num_run')
-
-        for config_option in ["torch_num_threads", 'device']:
-            if config_option not in X:
-                raise ValueError("To fit a trainer, expected fit dictionary to have a {}".format(
-                    config_option
-                ))
-
-        # For early stopping, we need to know the patience
-        if 'early_stopping' not in X:
-            raise ValueError('To fit a Trainer, expected fit dictionary to have early_stopping')
-
-    @staticmethod
-    def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
-        """
-        A method to get the total/trainable parameter count from the model
-
-        Args:
-            model (torch.nn.Module): the module from which to count parameters
-
-        Returns:
-            total_parameter_count: the total number of parameters of the model
-            trainable_parameter_count: only the parameters being optimized
-        """
-        total_parameter_count = sum(
-            p.numel() for p in model.parameters())
-        trainable_parameter_count = sum(
-            p.numel() for p in model.parameters() if p.requires_grad)
-        return total_parameter_count, trainable_parameter_count
-
-    def save_model_for_ensemble(self) -> str:
-        raise NotImplementedError()
-
-    def __str__(self) -> str:
-        """ Allow a nice understanding of what components where used """
-        string = str(self.run_summary)
-        return string
diff --git a/autoPyTorch/pipeline/image_classification.py b/autoPyTorch/pipeline/image_classification.py
index bf15d738a..d013f7027 100644
--- a/autoPyTorch/pipeline/image_classification.py
+++ b/autoPyTorch/pipeline/image_classification.py
@@ -8,7 +8,7 @@
 
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer_choice import (
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise import (
     NormalizerChoice
 )
 from autoPyTorch.pipeline.components.setup.augmentation.image.ImageAugmenter import ImageAugmenter
diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
index ef57a8569..fd607bf70 100644
--- a/autoPyTorch/pipeline/tabular_classification.py
+++ b/autoPyTorch/pipeline/tabular_classification.py
@@ -18,27 +18,24 @@
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import (
     TabularColumnTransformer
 )
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import (
     EncoderChoice
 )
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
-    base_feature_preprocessor_choice import FeatureProprocessorChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing import (
+    FeatureProprocessorChoice
+)
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice
 from autoPyTorch.pipeline.components.setup.early_preprocessor.EarlyPreprocessing import EarlyPreprocessing
-from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice import SchedulerChoice
+from autoPyTorch.pipeline.components.setup.lr_scheduler import SchedulerChoice
 from autoPyTorch.pipeline.components.setup.network.base_network import NetworkComponent
-from autoPyTorch.pipeline.components.setup.network_backbone.base_network_backbone_choice import NetworkBackboneChoice
-from autoPyTorch.pipeline.components.setup.network_embedding.base_network_embedding_choice import NetworkEmbeddingChoice
-from autoPyTorch.pipeline.components.setup.network_head.base_network_head_choice import NetworkHeadChoice
-from autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice import (
-    NetworkInitializerChoice
-)
-from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice import OptimizerChoice
+from autoPyTorch.pipeline.components.setup.network_backbone import NetworkBackboneChoice
+from autoPyTorch.pipeline.components.setup.network_embedding import NetworkEmbeddingChoice
+from autoPyTorch.pipeline.components.setup.network_head import NetworkHeadChoice
+from autoPyTorch.pipeline.components.setup.network_initializer import NetworkInitializerChoice
+from autoPyTorch.pipeline.components.setup.optimizer import OptimizerChoice
 from autoPyTorch.pipeline.components.training.data_loader.feature_data_loader import FeatureDataLoader
-from autoPyTorch.pipeline.components.training.trainer.base_trainer_choice import (
-    TrainerChoice
-)
+from autoPyTorch.pipeline.components.training.trainer import TrainerChoice
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
 
diff --git a/autoPyTorch/pipeline/tabular_regression.py b/autoPyTorch/pipeline/tabular_regression.py
index 2650868b6..27a3ae314 100644
--- a/autoPyTorch/pipeline/tabular_regression.py
+++ b/autoPyTorch/pipeline/tabular_regression.py
@@ -16,27 +16,26 @@
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import (
     TabularColumnTransformer
 )
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import (
     EncoderChoice
 )
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
-    base_feature_preprocessor_choice import FeatureProprocessorChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing import (
+    FeatureProprocessorChoice,
+)
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice
 from autoPyTorch.pipeline.components.setup.early_preprocessor.EarlyPreprocessing import EarlyPreprocessing
-from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice import SchedulerChoice
+from autoPyTorch.pipeline.components.setup.lr_scheduler import SchedulerChoice
 from autoPyTorch.pipeline.components.setup.network.base_network import NetworkComponent
-from autoPyTorch.pipeline.components.setup.network_backbone.base_network_backbone_choice import NetworkBackboneChoice
-from autoPyTorch.pipeline.components.setup.network_embedding.base_network_embedding_choice import NetworkEmbeddingChoice
-from autoPyTorch.pipeline.components.setup.network_head.base_network_head_choice import NetworkHeadChoice
-from autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice import (
+from autoPyTorch.pipeline.components.setup.network_backbone import NetworkBackboneChoice
+from autoPyTorch.pipeline.components.setup.network_embedding import NetworkEmbeddingChoice
+from autoPyTorch.pipeline.components.setup.network_head import NetworkHeadChoice
+from autoPyTorch.pipeline.components.setup.network_initializer import (
     NetworkInitializerChoice
 )
-from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice import OptimizerChoice
+from autoPyTorch.pipeline.components.setup.optimizer import OptimizerChoice
 from autoPyTorch.pipeline.components.training.data_loader.feature_data_loader import FeatureDataLoader
-from autoPyTorch.pipeline.components.training.trainer.base_trainer_choice import (
-    TrainerChoice
-)
+from autoPyTorch.pipeline.components.training.trainer import TrainerChoice
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
 
diff --git a/autoPyTorch/pipeline/traditional_tabular_classification.py b/autoPyTorch/pipeline/traditional_tabular_classification.py
index 49be2a1fa..5b0471e87 100644
--- a/autoPyTorch/pipeline/traditional_tabular_classification.py
+++ b/autoPyTorch/pipeline/traditional_tabular_classification.py
@@ -9,7 +9,7 @@
 
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
-from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
+from autoPyTorch.pipeline.components.setup.traditional_ml import ModelChoice
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
 
diff --git a/autoPyTorch/utils/parallel.py b/autoPyTorch/utils/parallel.py
new file mode 100644
index 000000000..0b2f9f390
--- /dev/null
+++ b/autoPyTorch/utils/parallel.py
@@ -0,0 +1,41 @@
+import multiprocessing
+import sys
+
+
+def preload_modules(context: multiprocessing.context.BaseContext) -> None:
+    """
+    This function is meant to be used with the forkserver multiprocessing context.
+    More details about it can be found here:
+    https://docs.python.org/3/library/multiprocessing.html
+
+    Forkserver is known to be slower than other contexts. We use it, because it helps
+    reduce the probability of a deadlock. To make it fast, we pre-load modules so that
+    forked children have the desired modules available.
+
+    We do not inherit dead-lock problematic modules like logging.
+
+    Arguments:
+        context (multiprocessing.context.BaseContext): One of the three supported multiprocessing
+            contexts being fork, forkserver or spawn.
+    """
+    all_loaded_modules = sys.modules.keys()
+    preload = [
+        loaded_module for loaded_module in all_loaded_modules
+        if loaded_module.split('.')[0] in (
+            'smac',
+            'autoPyTorch',
+            'numpy',
+            'scipy',
+            'pandas',
+            'pynisher',
+            'sklearn',
+            'ConfigSpace',
+            'torch',
+            'torchvision',
+            'tensorboard',
+            'imgaug',
+            'catboost',
+            'lightgbm',
+        ) and 'logging' not in loaded_module
+    ]
+    context.set_forkserver_preload(preload)
diff --git a/autoPyTorch/utils/single_thread_client.py b/autoPyTorch/utils/single_thread_client.py
new file mode 100644
index 000000000..3d8455800
--- /dev/null
+++ b/autoPyTorch/utils/single_thread_client.py
@@ -0,0 +1,93 @@
+import typing
+from pathlib import Path
+
+import dask.distributed
+
+
+class DummyFuture(dask.distributed.Future):
+    """
+    A class that mimics a distributed Future, the outcome of
+    performing submit on a distributed client.
+    """
+    def __init__(self, result: typing.Any) -> None:
+        self._result = result  # type: typing.Any
+
+    def result(self, timeout: typing.Optional[int] = None) -> typing.Any:
+        return self._result
+
+    def cancel(self) -> None:
+        pass
+
+    def done(self) -> bool:
+        return True
+
+    def __repr__(self) -> str:
+        return "DummyFuture: {}".format(self._result)
+
+    def __del__(self) -> None:
+        pass
+
+
+class SingleThreadedClient(dask.distributed.Client):
+    """
+    A class to Mock the Distributed Client class.
+
+    Using dask requires a scheduler which submits jobs on a different process. Also,
+    pynisher submits jobs in a further additional process.
+
+    When using a single core, we would prefer using the same main process without any
+    multiprocessing overhead (that is, without the need of a LocalCluster in
+    dask.distributed.Client). In other words, this class enriches the Client() class
+    with the capability to run a future in the same thread (without any deadlock).
+    """
+    def __init__(self) -> None:
+
+        # Raise a not implemented error if using a method from Client
+        implemented_methods = ['submit', 'close', 'shutdown', 'write_scheduler_file',
+                               '_get_scheduler_info', 'nthreads']
+        method_list = [func for func in dir(dask.distributed.Client) if callable(
+            getattr(dask.distributed.Client, func)) and not func.startswith('__')]
+        for method in method_list:
+            if method in implemented_methods:
+                continue
+            setattr(self, method, self._unsupported_method)
+        pass
+
+    def _unsupported_method(self) -> None:
+        raise NotImplementedError()
+
+    def submit(
+        self,
+        func: typing.Callable,
+        *args: typing.List,
+        priority: int = 0,
+        **kwargs: typing.Any,
+    ) -> typing.Any:
+        return DummyFuture(func(*args, **kwargs))
+
+    def close(self) -> None:
+        pass
+
+    def shutdown(self) -> None:
+        pass
+
+    def write_scheduler_file(self, scheduler_file: str) -> None:
+        Path(scheduler_file).touch()
+        return
+
+    def _get_scheduler_info(self) -> typing.Dict:
+        return {
+            'workers': ['127.0.0.1'],
+            'type': 'Scheduler',
+        }
+
+    def nthreads(self) -> typing.Dict:
+        return {
+            '127.0.0.1': 1,
+        }
+
+    def __repr__(self) -> str:
+        return 'SingleThreadedClient()'
+
+    def __del__(self) -> None:
+        pass
diff --git a/examples/20_basics/example_tabular_classification.py b/examples/20_basics/example_tabular_classification.py
index c0251fa90..7b1aa9995 100644
--- a/examples/20_basics/example_tabular_classification.py
+++ b/examples/20_basics/example_tabular_classification.py
@@ -24,50 +24,48 @@
 from autoPyTorch.api.tabular_classification import TabularClassificationTask
 
 
-if __name__ == '__main__':
+############################################################################
+# Data Loading
+# ============
+X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X,
+    y,
+    random_state=1,
+)
 
-    ############################################################################
-    # Data Loading
-    # ============
-    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X,
-        y,
-        random_state=42,
-    )
+############################################################################
+# Build and fit a classifier
+# ==========================
+api = TabularClassificationTask(
+    # To maintain logs of the run, you can uncomment the
+    # Following lines
+    # temporary_directory='./tmp/autoPyTorch_example_tmp_01',
+    # output_directory='./tmp/autoPyTorch_example_out_01',
+    # delete_tmp_folder_after_terminate=False,
+    # delete_output_folder_after_terminate=False,
+    seed=42,
+)
 
-    ############################################################################
-    # Build and fit a classifier
-    # ==========================
-    api = TabularClassificationTask(
-        # To maintain logs of the run, you can uncomment the
-        # Following lines
-        # temporary_directory='./tmp/autoPyTorch_example_tmp_01',
-        # output_directory='./tmp/autoPyTorch_example_out_01',
-        # delete_tmp_folder_after_terminate=False,
-        # delete_output_folder_after_terminate=False,
-        seed=42,
-    )
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='accuracy',
+    total_walltime_limit=300,
+    func_eval_time_limit_secs=50
+)
 
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric='accuracy',
-        total_walltime_limit=300,
-        func_eval_time_limit_secs=50
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred = api.predict(X_test)
-    score = api.score(y_pred, y_test)
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
diff --git a/examples/20_basics/example_tabular_regression.py b/examples/20_basics/example_tabular_regression.py
index ef8cacb37..836d4d6d6 100644
--- a/examples/20_basics/example_tabular_regression.py
+++ b/examples/20_basics/example_tabular_regression.py
@@ -24,56 +24,44 @@
 from autoPyTorch.api.tabular_regression import TabularRegressionTask
 
 
-if __name__ == '__main__':
+############################################################################
+# Data Loading
+# ============
+X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X,
+    y,
+    random_state=1,
+)
 
-    ############################################################################
-    # Data Loading
-    # ============
-    X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X,
-        y,
-        random_state=1,
-    )
+############################################################################
+# Build and fit a regressor
+# ==========================
+api = TabularRegressionTask()
 
-    # Scale the regression targets to have zero mean and unit variance.
-    # This is important for Neural Networks since predicting large target values would require very large weights.
-    # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean
-    y_train_mean = y_train.mean()
-    y_train_std = y_train.std()
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='r2',
+    total_walltime_limit=300,
+    func_eval_time_limit_secs=50,
+    enable_traditional_pipeline=False,
+)
 
-    y_train_scaled = (y_train - y_train_mean) / y_train_std
-    y_test_scaled = (y_test - y_train_mean) / y_train_std
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
 
-    ############################################################################
-    # Build and fit a regressor
-    # ==========================
-    api = TabularRegressionTask()
+# Rescale the Neural Network predictions into the original target range
+score = api.score(y_pred, y_test)
 
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train_scaled,
-        X_test=X_test.copy(),
-        y_test=y_test_scaled.copy(),
-        optimize_metric='r2',
-        total_walltime_limit=300,
-        func_eval_time_limit_secs=50,
-        enable_traditional_pipeline=False,
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred_scaled = api.predict(X_test)
-
-    # Rescale the Neural Network predictions into the original target range
-    y_pred = y_pred_scaled * y_train_std + y_train_mean
-    score = api.score(y_pred, y_test)
-
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
diff --git a/examples/40_advanced/README.txt b/examples/40_advanced/README.txt
index f3293bf16..fb68a2c31 100644
--- a/examples/40_advanced/README.txt
+++ b/examples/40_advanced/README.txt
@@ -1,11 +1,12 @@
 .. _examples_tabular_basics:
 
 
-==============================
+=================================
 Advanced Tabular Dataset Examples
-==============================
+=================================
 
 Advanced examples for using *Auto-PyTorch* on tabular datasets.
 We explain
     1. How to customise the search space
     2. How to split the data according to different resampling strategies
+    3. How to visualize the results of Auto-PyTorch
diff --git a/examples/40_advanced/example_custom_configuration_space.py b/examples/40_advanced/example_custom_configuration_space.py
index 6a3764b94..bd02e51f1 100644
--- a/examples/40_advanced/example_custom_configuration_space.py
+++ b/examples/40_advanced/example_custom_configuration_space.py
@@ -72,7 +72,11 @@ def get_search_space_updates():
     ############################################################################
     # Build and fit a classifier with include components
     # ==================================================
+    # AutoPyTorch can search for multiple configurations at the same time
+    # if multiple cores are allocated, using the n_jobs argument. By default,
+    # Only 1 core is used while searching for configurations.
     api = TabularClassificationTask(
+        n_jobs=2,
         search_space_updates=get_search_space_updates(),
         include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'],
                             'encoder': ['OneHotEncoder']}
diff --git a/examples/40_advanced/example_resampling_strategy.py b/examples/40_advanced/example_resampling_strategy.py
index 270f518c8..9fb77b76d 100644
--- a/examples/40_advanced/example_resampling_strategy.py
+++ b/examples/40_advanced/example_resampling_strategy.py
@@ -27,118 +27,116 @@
 from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 
 
-if __name__ == '__main__':
-
-    ############################################################################
-    # Data Loading
-    # ============
-    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X,
-        y,
-        random_state=1,
-    )
-
-    ############################################################################
-    # Build and fit a classifier with default resampling strategy
-    # ===========================================================
-    api = TabularClassificationTask(
-        # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
-        # is the default argument setting for TabularClassificationTask.
-        # It is explicitly specified in this example for demonstrational
-        # purpose.
-        resampling_strategy=HoldoutValTypes.holdout_validation,
-        resampling_strategy_args={'val_share': 0.33}
-    )
-
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=30
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred = api.predict(X_test)
-    score = api.score(y_pred, y_test)
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
-
-    ############################################################################
-
-    ############################################################################
-    # Build and fit a classifier with Cross validation resampling strategy
-    # ====================================================================
-    api = TabularClassificationTask(
-        resampling_strategy=CrossValTypes.k_fold_cross_validation,
-        resampling_strategy_args={'num_splits': 3}
-    )
-
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=30
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred = api.predict(X_test)
-    score = api.score(y_pred, y_test)
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
-
-    ############################################################################
-
-    ############################################################################
-    # Build and fit a classifier with Stratified resampling strategy
-    # ==============================================================
-    api = TabularClassificationTask(
-        # For demonstration purposes, we use
-        # Stratified hold out validation. However,
-        # one can also use CrossValTypes.stratified_k_fold_cross_validation.
-        resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
-        resampling_strategy_args={'val_share': 0.33}
-    )
-
-    ############################################################################
-    # Search for an ensemble of machine learning algorithms
-    # =====================================================
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=30
-    )
-
-    ############################################################################
-    # Print the final ensemble performance
-    # ====================================
-    print(api.run_history, api.trajectory)
-    y_pred = api.predict(X_test)
-    score = api.score(y_pred, y_test)
-    print(score)
-    # Print the final ensemble built by AutoPyTorch
-    print(api.show_models())
+############################################################################
+# Data Loading
+# ============
+X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X,
+    y,
+    random_state=1,
+)
+
+############################################################################
+# Build and fit a classifier with default resampling strategy
+# ===========================================================
+api = TabularClassificationTask(
+    # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33
+    # is the default argument setting for TabularClassificationTask.
+    # It is explicitly specified in this example for demonstrational
+    # purpose.
+    resampling_strategy=HoldoutValTypes.holdout_validation,
+    resampling_strategy_args={'val_share': 0.33}
+)
+
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='accuracy',
+    total_walltime_limit=150,
+    func_eval_time_limit_secs=30
+)
+
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
+
+############################################################################
+
+############################################################################
+# Build and fit a classifier with Cross validation resampling strategy
+# ====================================================================
+api = TabularClassificationTask(
+    resampling_strategy=CrossValTypes.k_fold_cross_validation,
+    resampling_strategy_args={'num_splits': 3}
+)
+
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='accuracy',
+    total_walltime_limit=150,
+    func_eval_time_limit_secs=30
+)
+
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
+
+############################################################################
+
+############################################################################
+# Build and fit a classifier with Stratified resampling strategy
+# ==============================================================
+api = TabularClassificationTask(
+    # For demonstration purposes, we use
+    # Stratified hold out validation. However,
+    # one can also use CrossValTypes.stratified_k_fold_cross_validation.
+    resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
+    resampling_strategy_args={'val_share': 0.33}
+)
+
+############################################################################
+# Search for an ensemble of machine learning algorithms
+# =====================================================
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric='accuracy',
+    total_walltime_limit=150,
+    func_eval_time_limit_secs=30
+)
+
+############################################################################
+# Print the final ensemble performance
+# ====================================
+print(api.run_history, api.trajectory)
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print(score)
+# Print the final ensemble built by AutoPyTorch
+print(api.show_models())
diff --git a/examples/40_advanced/example_visualization.py b/examples/40_advanced/example_visualization.py
index 011ea9b78..107d07a47 100644
--- a/examples/40_advanced/example_visualization.py
+++ b/examples/40_advanced/example_visualization.py
@@ -50,119 +50,117 @@
 from autoPyTorch.metrics import accuracy
 
 
-if __name__ == '__main__':
-
-    ############################################################################
-    # Data Loading
-    # ============
-
-    # We will use the iris dataset for this Toy example
-    seed = 42
-    X, y = sklearn.datasets.fetch_openml(data_id=61, return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X,
-        y,
-        random_state=42,
-    )
-
-    ############################################################################
-    # Build and fit a classifier
-    # ==========================
-    api = TabularClassificationTask(seed=seed)
-    api.search(
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test.copy(),
-        y_test=y_test.copy(),
-        optimize_metric=accuracy.name,
-        total_walltime_limit=200,
-        func_eval_time_limit_secs=50
-    )
-
-    ############################################################################
-    # One can also save the model for future inference
-    # ================================================
-
-    # For more details on how to deploy a model, please check
-    # `Scikit-Learn persistence
-    # <https://scikit-learn.org/stable/modules/model_persistence.html>`_ support.
-    with open('estimator.pickle', 'wb') as handle:
-        pickle.dump(api, handle, protocol=pickle.HIGHEST_PROTOCOL)
-
-    # Then let us read it back and use it for our analysis
-    with open('estimator.pickle', 'rb') as handle:
-        estimator = pickle.load(handle)
-
-    ############################################################################
-    # Plotting the model performance
-    # ==============================
-
-    # We will plot the search incumbent through time.
-
-    # Collect the performance of individual machine learning algorithms
-    # found by SMAC
-    individual_performances = []
-    for run_key, run_value in estimator.run_history.data.items():
-        if run_value.status != StatusType.SUCCESS:
-            # Ignore crashed runs
-            continue
-        individual_performances.append({
-            'Timestamp': pd.Timestamp(
-                time.strftime(
-                    '%Y-%m-%d %H:%M:%S',
-                    time.localtime(run_value.endtime)
-                )
-            ),
-            'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,
-            'single_best_test_accuracy': np.nan if run_value.additional_info is None else
-            accuracy._optimum - run_value.additional_info['test_loss'],
-        })
-    individual_performance_frame = pd.DataFrame(individual_performances)
-
-    # Collect the performance of the ensemble through time
-    # This ensemble is built from the machine learning algorithms
-    # found by SMAC
-    ensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)
-
-    # As we are tracking the incumbent, we are interested in the cummax() performance
-    ensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[
-        'train_accuracy'
-    ].cummax()
-    ensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[
-        'test_accuracy'
-    ].cummax()
-    ensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)
-    individual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[
-        'single_best_optimization_accuracy'
-    ].cummax()
-    individual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[
-        'single_best_test_accuracy'
-    ].cummax()
-
-    pd.merge(
-        ensemble_performance_frame,
-        individual_performance_frame,
-        on="Timestamp", how='outer'
-    ).sort_values('Timestamp').fillna(method='ffill').plot(
-        x='Timestamp',
-        kind='line',
-        legend=True,
-        title='Auto-PyTorch accuracy over time',
-        grid=True,
-    )
-    plt.show()
-
-    # We then can understand the importance of each input feature using
-    # a permutation importance analysis. This is done as a proof of concept, to
-    # showcase that we can leverage of scikit-learn API.
-    result = permutation_importance(estimator, X_train, y_train, n_repeats=5,
-                                    scoring='accuracy',
-                                    random_state=seed)
-    sorted_idx = result.importances_mean.argsort()
-
-    fig, ax = plt.subplots()
-    ax.boxplot(result.importances[sorted_idx].T,
-               vert=False, labels=X_test.columns[sorted_idx])
-    ax.set_title("Permutation Importances (Train set)")
-    fig.tight_layout()
-    plt.show()
+############################################################################
+# Data Loading
+# ============
+
+# We will use the iris dataset for this Toy example
+seed = 42
+X, y = sklearn.datasets.fetch_openml(data_id=61, return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X,
+    y,
+    random_state=42,
+)
+
+############################################################################
+# Build and fit a classifier
+# ==========================
+api = TabularClassificationTask(seed=seed)
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test.copy(),
+    y_test=y_test.copy(),
+    optimize_metric=accuracy.name,
+    total_walltime_limit=200,
+    func_eval_time_limit_secs=50
+)
+
+############################################################################
+# One can also save the model for future inference
+# ================================================
+
+# For more details on how to deploy a model, please check
+# `Scikit-Learn persistence
+# <https://scikit-learn.org/stable/modules/model_persistence.html>`_ support.
+with open('estimator.pickle', 'wb') as handle:
+    pickle.dump(api, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+# Then let us read it back and use it for our analysis
+with open('estimator.pickle', 'rb') as handle:
+    estimator = pickle.load(handle)
+
+############################################################################
+# Plotting the model performance
+# ==============================
+
+# We will plot the search incumbent through time.
+
+# Collect the performance of individual machine learning algorithms
+# found by SMAC
+individual_performances = []
+for run_key, run_value in estimator.run_history.data.items():
+    if run_value.status != StatusType.SUCCESS:
+        # Ignore crashed runs
+        continue
+    individual_performances.append({
+        'Timestamp': pd.Timestamp(
+            time.strftime(
+                '%Y-%m-%d %H:%M:%S',
+                time.localtime(run_value.endtime)
+            )
+        ),
+        'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,
+        'single_best_test_accuracy': np.nan if run_value.additional_info is None else
+        accuracy._optimum - run_value.additional_info['test_loss'],
+    })
+individual_performance_frame = pd.DataFrame(individual_performances)
+
+# Collect the performance of the ensemble through time
+# This ensemble is built from the machine learning algorithms
+# found by SMAC
+ensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)
+
+# As we are tracking the incumbent, we are interested in the cummax() performance
+ensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[
+    'train_accuracy'
+].cummax()
+ensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[
+    'test_accuracy'
+].cummax()
+ensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)
+individual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[
+    'single_best_optimization_accuracy'
+].cummax()
+individual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[
+    'single_best_test_accuracy'
+].cummax()
+
+pd.merge(
+    ensemble_performance_frame,
+    individual_performance_frame,
+    on="Timestamp", how='outer'
+).sort_values('Timestamp').fillna(method='ffill').plot(
+    x='Timestamp',
+    kind='line',
+    legend=True,
+    title='Auto-PyTorch accuracy over time',
+    grid=True,
+)
+plt.show()
+
+# We then can understand the importance of each input feature using
+# a permutation importance analysis. This is done as a proof of concept, to
+# showcase that we can leverage of scikit-learn API.
+result = permutation_importance(estimator, X_train, y_train, n_repeats=5,
+                                scoring='accuracy',
+                                random_state=seed)
+sorted_idx = result.importances_mean.argsort()
+
+fig, ax = plt.subplots()
+ax.boxplot(result.importances[sorted_idx].T,
+           vert=False, labels=X_test.columns[sorted_idx])
+ax.set_title("Permutation Importances (Train set)")
+fig.tight_layout()
+plt.show()
diff --git a/test/test_ensemble/test_ensemble.py b/test/test_ensemble/test_ensemble.py
index cd0f02e72..913fda548 100644
--- a/test/test_ensemble/test_ensemble.py
+++ b/test/test_ensemble/test_ensemble.py
@@ -525,7 +525,7 @@ def test_run_end_at(ensemble_backend):
 
         current_time = time.time()
 
-        ensbuilder.run(end_at=current_time + 10, iteration=1)
+        ensbuilder.run(end_at=current_time + 10, iteration=1, pynisher_context='forkserver')
         # 4 seconds left because: 10 seconds - 5 seconds overhead - very little overhead,
         # but then rounded to an integer
         assert pynisher_mock.call_args_list[0][1]["wall_time_in_s"], 4
@@ -718,9 +718,10 @@ def test_ensemble_builder_nbest_remembered(fit_ensemble, ensemble_backend, dask_
         ensemble_memory_limit=1000,
         random_state=0,
         max_iterations=None,
+        pynisher_context='fork',
     )
 
-    manager.build_ensemble(dask_client, unit_test=True, pynisher_context='fork')
+    manager.build_ensemble(dask_client, unit_test=True)
     future = manager.futures[0]
     dask.distributed.wait([future])  # wait for the ensemble process to finish
     assert future.result() == ([], 5, None, None), vars(future.result())
diff --git a/test/test_evaluation/test_evaluation.py b/test/test_evaluation/test_evaluation.py
index 015b78dca..9afa8969f 100644
--- a/test/test_evaluation/test_evaluation.py
+++ b/test/test_evaluation/test_evaluation.py
@@ -353,6 +353,7 @@ def test_exception_in_target_function(self, eval_holdout_mock):
         self.assertIn('traceback', info[1].additional_info)
         self.assertNotIn('exitcode', info[1].additional_info)
 
+    @unittest.skipIf(sys.version_info < (3, 7), reason="requires python3.7 or higher")
     def test_silent_exception_in_target_function(self):
         config = unittest.mock.Mock(spec=int)
         config.config_id = 198
@@ -380,6 +381,7 @@ def test_silent_exception_in_target_function(self):
                 """'save_targets_ensemble'",)""",
                 """AttributeError("'BackendMock' object has no attribute """
                 """'save_targets_ensemble'")""",
+                """AttributeError('save_targets_ensemble')"""
                 """AttributeError("'BackendMock' object has no attribute """
                 """'setup_logger'",)""",
                 """AttributeError("'BackendMock' object has no attribute """
diff --git a/test/test_pipeline/components/preprocessing/base.py b/test/test_pipeline/components/preprocessing/base.py
index 875ed399c..d9f5170c5 100644
--- a/test/test_pipeline/components/preprocessing/base.py
+++ b/test/test_pipeline/components/preprocessing/base.py
@@ -3,10 +3,9 @@
 from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import \
     TabularColumnTransformer
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import \
-    EncoderChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import EncoderChoice
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice
 from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
 
 
diff --git a/test/test_pipeline/components/preprocessing/test_encoder_choice.py b/test/test_pipeline/components/preprocessing/test_encoder_choice.py
index f4dbcc119..860fd8eac 100644
--- a/test/test_pipeline/components/preprocessing/test_encoder_choice.py
+++ b/test/test_pipeline/components/preprocessing/test_encoder_choice.py
@@ -1,7 +1,7 @@
 import copy
 import unittest
 
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import (
     EncoderChoice
 )
 
diff --git a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
index 822112fca..99fad6b1f 100644
--- a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
+++ b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
@@ -7,10 +7,11 @@
 from sklearn.base import BaseEstimator
 from sklearn.compose import make_column_transformer
 
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing import (
+    FeatureProprocessorChoice
+)
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
     NoFeaturePreprocessor import NoFeaturePreprocessor
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
-    base_feature_preprocessor_choice import FeatureProprocessorChoice
 from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
 
 
diff --git a/test/test_pipeline/components/preprocessing/test_feature_preprocessor_choice.py b/test/test_pipeline/components/preprocessing/test_feature_preprocessor_choice.py
index 52d55c6df..57841aef0 100644
--- a/test/test_pipeline/components/preprocessing/test_feature_preprocessor_choice.py
+++ b/test/test_pipeline/components/preprocessing/test_feature_preprocessor_choice.py
@@ -1,8 +1,9 @@
 import copy
 import unittest
 
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
-    base_feature_preprocessor_choice import FeatureProprocessorChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing import (
+    FeatureProprocessorChoice
+)
 
 
 class TestFeaturePreprocessorChoice(unittest.TestCase):
diff --git a/test/test_pipeline/components/preprocessing/test_normalizer_choice.py b/test/test_pipeline/components/preprocessing/test_normalizer_choice.py
index dbb711ab0..42b79d72f 100644
--- a/test/test_pipeline/components/preprocessing/test_normalizer_choice.py
+++ b/test/test_pipeline/components/preprocessing/test_normalizer_choice.py
@@ -1,7 +1,7 @@
 import copy
 import unittest
 
-from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer_choice import (
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise import (
     NormalizerChoice
 )
 
diff --git a/test/test_pipeline/components/preprocessing/test_scaler_choice.py b/test/test_pipeline/components/preprocessing/test_scaler_choice.py
index 9d10af59f..3e4b6a3e5 100644
--- a/test/test_pipeline/components/preprocessing/test_scaler_choice.py
+++ b/test/test_pipeline/components/preprocessing/test_scaler_choice.py
@@ -1,7 +1,7 @@
 import copy
 import unittest
 
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice
 
 
 class TestRescalerChoice(unittest.TestCase):
diff --git a/test/test_pipeline/components/setup/test_setup.py b/test/test_pipeline/components/setup/test_setup.py
index aae1c8ff4..9d9b6f7ad 100644
--- a/test/test_pipeline/components/setup/test_setup.py
+++ b/test/test_pipeline/components/setup/test_setup.py
@@ -10,27 +10,26 @@
 import torch
 from torch import nn
 
-import autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice as lr_components
-import \
-    autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice as network_initializer_components  # noqa: E501
-import autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice as optimizer_components
+import autoPyTorch.pipeline.components.setup.lr_scheduler as lr_components
+import autoPyTorch.pipeline.components.setup.network_backbone as base_network_backbone_choice
+import autoPyTorch.pipeline.components.setup.network_head as base_network_head_choice
+import autoPyTorch.pipeline.components.setup.network_initializer as network_initializer_components  # noqa: E501
+import autoPyTorch.pipeline.components.setup.optimizer as optimizer_components
 from autoPyTorch import constants
 from autoPyTorch.pipeline.components.base_component import ThirdPartyComponents
-from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice import (
+from autoPyTorch.pipeline.components.setup.lr_scheduler import (
     BaseLRComponent,
     SchedulerChoice
 )
-from autoPyTorch.pipeline.components.setup.network_backbone import base_network_backbone_choice
+from autoPyTorch.pipeline.components.setup.network_backbone import NetworkBackboneChoice
 from autoPyTorch.pipeline.components.setup.network_backbone.base_network_backbone import NetworkBackboneComponent
-from autoPyTorch.pipeline.components.setup.network_backbone.base_network_backbone_choice import NetworkBackboneChoice
-from autoPyTorch.pipeline.components.setup.network_head import base_network_head_choice
+from autoPyTorch.pipeline.components.setup.network_head import NetworkHeadChoice
 from autoPyTorch.pipeline.components.setup.network_head.base_network_head import NetworkHeadComponent
-from autoPyTorch.pipeline.components.setup.network_head.base_network_head_choice import NetworkHeadChoice
-from autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice import (
+from autoPyTorch.pipeline.components.setup.network_initializer import (
     BaseNetworkInitializerComponent,
     NetworkInitializerChoice
 )
-from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice import (
+from autoPyTorch.pipeline.components.setup.optimizer import (
     BaseOptimizerComponent,
     OptimizerChoice
 )
diff --git a/test/test_pipeline/components/setup/test_setup_traditional_classification.py b/test/test_pipeline/components/setup/test_setup_traditional_classification.py
index ea3100724..90c7f18f6 100644
--- a/test/test_pipeline/components/setup/test_setup_traditional_classification.py
+++ b/test/test_pipeline/components/setup/test_setup_traditional_classification.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
+from autoPyTorch.pipeline.components.setup.traditional_ml import ModelChoice
 from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models.classifiers import (
     CatboostModel,
     ExtraTreesModel,
diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index 38b6b5007..98ab27b31 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -23,8 +23,8 @@ def prepare_trainer(self,
                         trainer: BaseTrainerComponent,
                         task_type: int,
                         epochs=50):
+        # make this test reproducible
         torch.manual_seed(1)
-
         if task_type in CLASSIFICATION_TASKS:
             X, y = make_classification(
                 n_samples=n_samples,
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index 36670e325..c55bd967c 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -16,6 +16,9 @@
 from autoPyTorch.pipeline.components.training.data_loader.base_data_loader import (
     BaseDataLoaderComponent,
 )
+from autoPyTorch.pipeline.components.training.trainer import (
+    TrainerChoice,
+)
 from autoPyTorch.pipeline.components.training.trainer.MixUpTrainer import (
     MixUpTrainer
 )
@@ -24,9 +27,6 @@
 )
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import (
     BaseTrainerComponent, )
-from autoPyTorch.pipeline.components.training.trainer.base_trainer_choice import (
-    TrainerChoice,
-)
 
 sys.path.append(os.path.dirname(__file__))
 from test.test_pipeline.components.training.base import BaseTraining  # noqa (E402: module level import not at top of file)
diff --git a/test/test_utils/test_single_thread_client.py b/test/test_utils/test_single_thread_client.py
new file mode 100644
index 000000000..2338d4cf7
--- /dev/null
+++ b/test/test_utils/test_single_thread_client.py
@@ -0,0 +1,32 @@
+import dask.distributed
+
+from distributed.utils_test import inc
+
+import pytest
+
+from autoPyTorch.utils.single_thread_client import SingleThreadedClient
+
+
+def test_single_thread_client_like_dask_client():
+    single_thread_client = SingleThreadedClient()
+    assert isinstance(single_thread_client, dask.distributed.Client)
+    future = single_thread_client.submit(inc, 1)
+    assert isinstance(future, dask.distributed.Future)
+    assert future.done()
+    assert future.result() == 2
+    assert sum(single_thread_client.nthreads().values()) == 1
+    single_thread_client.close()
+    single_thread_client.shutdown()
+
+    # Client/Futures are printed, so make sure str works
+    # str calls __rpr__ which is the purpose of below check
+    assert str(future) != ""
+    assert str(single_thread_client) != ""
+
+    # Single thread client is an inherited version of dask client
+    # so that futures run in the same thread as the main job.
+    # We carefully selected what methods are inherited, and any other
+    # method should raise a not implemented error to be safe of major
+    # dask client api changes.
+    with pytest.raises(NotImplementedError):
+        single_thread_client.get_scheduler_logs()