diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 3f398c464..bacf905e7 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -29,7 +29,7 @@ jobs:
     - name: Run tests
       run: |
         if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml'; fi
-        python -m pytest -n 2 --timeout=600 --timeout-method=thread --dist load test -sv $codecov
+        python -m pytest --durations=20 --timeout=300 --timeout-method=thread -v $codecov test
     - name: Check for files left behind by test
       if: ${{ always() }}
       run: |
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index 565ffd4f3..51a7a8e38 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -48,6 +48,7 @@ class TransformSubset(Subset):
 
     We achieve so by adding a train flag to the pytorch subset
     """
+
     def __init__(self, dataset: Dataset, indices: Sequence[int], train: bool) -> None:
         self.dataset = dataset
         self.indices = indices
@@ -371,3 +372,11 @@ def get_dataset_properties(self, dataset_requirements: List[FitRequirement]) ->
                                    'num_classes': self.num_classes,
                                    })
         return dataset_properties
+
+    def get_required_dataset_info(self) -> Dict[str, Any]:
+        """
+        Returns a dictionary containing required dataset properties to instantiate a pipeline,
+        """
+        info = {'output_type': self.output_type,
+                'issparse': self.issparse}
+        return info
diff --git a/autoPyTorch/datasets/tabular_dataset.py b/autoPyTorch/datasets/tabular_dataset.py
index ab75ce3f8..dbaa3a260 100644
--- a/autoPyTorch/datasets/tabular_dataset.py
+++ b/autoPyTorch/datasets/tabular_dataset.py
@@ -104,7 +104,6 @@ def __init__(self, X: Union[np.ndarray, pd.DataFrame],
         # rather to have a performance through time on the test data
         if X_test is not None:
             X_test, self._test_data_types, _, _, _ = self.interpret_columns(X_test)
-
             # Some quality checks on the data
             if self.data_types != self._test_data_types:
                 raise ValueError(f"The train data inferred types {self.data_types} are "
@@ -205,8 +204,7 @@ def interpret_columns(self,
 
         return data, data_types, nan_mask, itovs, vtois
 
-    def infer_dataset_properties(self, X: Any) \
-            -> Tuple[List[int], List[int], List[object], int]:
+    def infer_dataset_properties(self, X: Any) -> Tuple[List[int], List[int], List[object], int]:
         """
         Infers the properties of the dataset like
         categorical_columns, numerical_columns, categories, num_features
@@ -225,5 +223,16 @@ def infer_dataset_properties(self, X: Any) \
                 numerical_columns.append(i)
         categories = [np.unique(X.iloc[:, a]).tolist() for a in categorical_columns]
         num_features = X.shape[1]
-
         return categorical_columns, numerical_columns, categories, num_features
+
+    def get_required_dataset_info(self) -> Dict[str, Any]:
+        """
+        Returns a dictionary containing required dataset properties to instantiate a pipeline,
+        """
+        info = super().get_required_dataset_info()
+        info.update({
+            'numerical_columns': self.numerical_columns,
+            'categorical_columns': self.categorical_columns,
+            'task_type': self.task_type
+        })
+        return info
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index 65d252852..aeb89464d 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -32,8 +32,7 @@
 from autoPyTorch.datasets.base_dataset import BaseDataset
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
 from autoPyTorch.evaluation.utils import (
-    convert_multioutput_multiclass_to_multilabel,
-    subsampler
+    convert_multioutput_multiclass_to_multilabel
 )
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
@@ -42,6 +41,7 @@
     get_metrics,
 )
 from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.common import subsampler
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
 from autoPyTorch.utils.pipeline import get_dataset_requirements
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
index 16b05a30e..3d3887ee5 100644
--- a/autoPyTorch/evaluation/train_evaluator.py
+++ b/autoPyTorch/evaluation/train_evaluator.py
@@ -17,9 +17,9 @@
     AbstractEvaluator,
     fit_and_suppress_warnings
 )
-from autoPyTorch.evaluation.utils import subsampler
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.common import subsampler
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
 __all__ = ['TrainEvaluator', 'eval_function']
diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py
index d783413ca..f7cefd100 100644
--- a/autoPyTorch/evaluation/utils.py
+++ b/autoPyTorch/evaluation/utils.py
@@ -4,8 +4,6 @@
 
 import numpy as np
 
-import pandas as pd
-
 from smac.runhistory.runhistory import RunValue
 
 __all__ = [
@@ -16,12 +14,6 @@
 ]
 
 
-def subsampler(data: Union[np.ndarray, pd.DataFrame],
-               x: Union[np.ndarray, List[int]]
-               ) -> Union[np.ndarray, pd.DataFrame]:
-    return data[x] if isinstance(data, np.ndarray) else data.iloc[x]
-
-
 def read_queue(queue_: Queue) -> List[RunValue]:
     stack: List[RunValue] = []
     while True:
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
index e77c65be2..24491af44 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
@@ -11,7 +11,7 @@
     autoPyTorchTabularPreprocessingComponent
 )
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
-from autoPyTorch.utils.common import FitRequirement
+from autoPyTorch.utils.common import FitRequirement, subsampler
 
 
 class TabularColumnTransformer(autoPyTorchTabularPreprocessingComponent):
@@ -48,7 +48,6 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
             "TabularColumnTransformer": an instance of self
         """
         self.check_requirements(X, y)
-
         numerical_pipeline = 'drop'
         categorical_pipeline = 'drop'
 
@@ -67,11 +66,11 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
         # Where to get the data -- Prioritize X_train if any else
         # get from backend
         if 'X_train' in X:
-            X_train = X['X_train']
+            X_train = subsampler(X['X_train'], X['train_indices'])
         else:
             X_train = X['backend'].load_datamanager().train_tensors[0]
-        self.preprocessor.fit(X_train)
 
+        self.preprocessor.fit(X_train)
         return self
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py
index 019861c92..7be7c94a2 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py
@@ -78,6 +78,9 @@ def get_hyperparameter_search_space(self,
         # add only no encoder to choice hyperparameters in case the dataset is only numerical
         if len(dataset_properties['categorical_columns']) == 0:
             default = 'NoEncoder'
+            if include is not None and default not in include:
+                raise ValueError("Provided {} in include, however, the dataset "
+                                 "is incompatible with it".format(include))
             preprocessor = CSH.CategoricalHyperparameter('__choice__',
                                                          ['NoEncoder'],
                                                          default_value=default)
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
new file mode 100644
index 000000000..4fb77b90f
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
@@ -0,0 +1,104 @@
+from math import ceil, floor
+from typing import Any, Dict, Optional, Tuple, Union
+
+from ConfigSpace.conditions import EqualsCondition, InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import sklearn.decomposition
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+from autoPyTorch.utils.common import FitRequirement
+
+
+class KernelPCA(autoPyTorchFeaturePreprocessingComponent):
+    def __init__(self, n_components: int = 10,
+                 kernel: str = 'rbf', degree: int = 3,
+                 gamma: float = 0.01, coef0: float = 0.0,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None
+                 ) -> None:
+        self.n_components = n_components
+        self.kernel = kernel
+        self.degree = degree
+        self.gamma = gamma
+        self.coef0 = coef0
+        self.random_state = random_state
+        super().__init__()
+
+        self.add_fit_requirements([
+            FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+
+        self.preprocessor['numerical'] = sklearn.decomposition.KernelPCA(
+            n_components=self.n_components, kernel=self.kernel,
+            degree=self.degree, gamma=self.gamma, coef0=self.coef0,
+            remove_zero_eig=True, random_state=self.random_state)
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None,
+        n_components: Tuple[Tuple, float] = ((0.5, 0.9), 0.5),
+        kernel: Tuple[Tuple, str] = (('poly', 'rbf', 'sigmoid', 'cosine'), 'rbf'),
+        gamma: Tuple[Tuple, float, bool] = ((3.0517578125e-05, 8), 0.01, True),
+        degree: Tuple[Tuple, int] = ((2, 5), 3),
+        coef0: Tuple[Tuple, float] = ((-1, 1), 0)
+    ) -> ConfigurationSpace:
+
+        if dataset_properties is not None:
+            n_features = len(dataset_properties['numerical_columns'])
+            n_components = ((floor(n_components[0][0] * n_features), ceil(n_components[0][1] * n_features)),
+                            ceil(n_components[1] * n_features))
+        else:
+            n_components = ((10, 2000), 100)
+
+        n_components = UniformIntegerHyperparameter(
+            "n_components", lower=n_components[0][0], upper=n_components[0][1], default_value=n_components[1])
+        kernel_hp = CategoricalHyperparameter('kernel', choices=kernel[0], default_value=kernel[1])
+        gamma = UniformFloatHyperparameter(
+            "gamma",
+            lower=gamma[0][0], upper=gamma[0][1],
+            log=gamma[2],
+            default_value=gamma[1],
+        )
+        coef0 = UniformFloatHyperparameter("coef0", lower=coef0[0][0], upper=coef0[0][1], default_value=coef0[1])
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([n_components, kernel_hp, gamma, coef0])
+
+        if "poly" in kernel_hp.choices:
+            degree = UniformIntegerHyperparameter('degree', lower=degree[0][0], upper=degree[0][1],
+                                                  default_value=degree[1])
+            cs.add_hyperparameters([degree])
+            degree_depends_on_poly = EqualsCondition(degree, kernel_hp, "poly")
+            cs.add_conditions([degree_depends_on_poly])
+        kernels = []
+        if "sigmoid" in kernel_hp.choices:
+            kernels.append("sigmoid")
+        if "poly" in kernel_hp.choices:
+            kernels.append("poly")
+        coef0_condition = InCondition(coef0, kernel_hp, kernels)
+        kernels = []
+        if "rbf" in kernel_hp.choices:
+            kernels.append("rbf")
+        if "poly" in kernel_hp.choices:
+            kernels.append("poly")
+        gamma_condition = InCondition(gamma, kernel_hp, kernels)
+        cs.add_conditions([coef0_condition, gamma_condition])
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {'shortname': 'KernelPCA',
+                'name': 'Kernel Principal Component Analysis',
+                'handles_sparse': True
+                }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/NoFeaturePreprocessor.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/NoFeaturePreprocessor.py
new file mode 100644
index 000000000..85e11973d
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/NoFeaturePreprocessor.py
@@ -0,0 +1,52 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+
+class NoFeaturePreprocessor(autoPyTorchFeaturePreprocessingComponent):
+    """
+    Don't perform feature preprocessing on categorical features
+    """
+    def __init__(self,
+                 random_state: Optional[Union[np.random.RandomState, int]] = None
+                 ):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchFeaturePreprocessingComponent:
+        """
+        The fit function calls the fit function of the underlying model
+        and returns the transformed array.
+        Args:
+            X (np.ndarray): input features
+            y (Optional[np.ndarray]): input labels
+
+        Returns:
+            instance of self
+        """
+        self.check_requirements(X, y)
+
+        return self
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the self into the 'X' dictionary and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        X.update({'feature_preprocessor': self.preprocessor})
+        return X
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'NoFeaturePreprocessing',
+            'name': 'No Feature Preprocessing',
+            'handles_sparse': True
+        }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
new file mode 100644
index 000000000..93be983e9
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
@@ -0,0 +1,102 @@
+from math import ceil, floor
+from typing import Any, Dict, Optional, Tuple, Union
+
+from ConfigSpace.conditions import EqualsCondition, InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import sklearn.kernel_approximation
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+
+class Nystroem(autoPyTorchFeaturePreprocessingComponent):
+    def __init__(self, n_components: int = 10,
+                 kernel: str = 'rbf', degree: int = 3,
+                 gamma: float = 0.01, coef0: float = 0.0,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None
+                 ) -> None:
+        self.n_components = n_components
+        self.kernel = kernel
+        self.degree = degree
+        self.gamma = gamma
+        self.coef0 = coef0
+        self.random_state = random_state
+        super().__init__()
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+
+        self.preprocessor['numerical'] = sklearn.kernel_approximation.Nystroem(
+            n_components=self.n_components, kernel=self.kernel,
+            degree=self.degree, gamma=self.gamma, coef0=self.coef0,
+            random_state=self.random_state)
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None,
+        n_components: Tuple[Tuple, float, bool] = ((0.5, 0.9), 0.5, True),
+        kernel: Tuple[Tuple, str] = (('poly', 'rbf', 'sigmoid', 'cosine'), 'rbf'),
+        gamma: Tuple[Tuple, float, bool] = ((3.0517578125e-05, 8), 0.01, True),
+        degree: Tuple[Tuple, int] = ((2, 5), 3),
+        coef0: Tuple[Tuple, float] = ((-1, 1), 0)
+    ) -> ConfigurationSpace:
+
+        if dataset_properties is not None:
+            n_features = len(dataset_properties['numerical_columns'])
+            # if numerical features are 1, set log to False
+            if n_features == 1:
+                log = False
+            else:
+                log = n_components[2]
+            n_components = ((floor(n_components[0][0] * n_features), ceil(n_components[0][1] * n_features)),
+                            ceil(n_components[1] * n_features), log)
+        else:
+            n_components = ((10, 2000), 100, True)
+
+        n_components = UniformIntegerHyperparameter(
+            "n_components", lower=n_components[0][0], upper=n_components[0][1],
+            default_value=n_components[1], log=n_components[2])
+        kernel_hp = CategoricalHyperparameter('kernel', choices=kernel[0], default_value=kernel[1])
+        gamma = UniformFloatHyperparameter(
+            "gamma",
+            lower=gamma[0][0], upper=gamma[0][1],
+            log=gamma[2],
+            default_value=gamma[1],
+        )
+        degree = UniformIntegerHyperparameter('degree', lower=degree[0][0], upper=degree[0][1], default_value=degree[1])
+        coef0 = UniformFloatHyperparameter("coef0", lower=coef0[0][0], upper=coef0[0][1], default_value=coef0[1])
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([n_components, kernel_hp, degree, gamma, coef0])
+
+        degree_depends_on_poly = EqualsCondition(degree, kernel_hp, "poly")
+        kernels = []
+        if "sigmoid" in kernel_hp.choices:
+            kernels.append("sigmoid")
+        if "poly" in kernel_hp.choices:
+            kernels.append("poly")
+        coef0_condition = InCondition(coef0, kernel_hp, kernels)
+        kernels = []
+        if "rbf" in kernel_hp.choices:
+            kernels.append("rbf")
+        if "poly" in kernel_hp.choices:
+            kernels.append("poly")
+        gamma_condition = InCondition(gamma, kernel_hp, kernels)
+        cs.add_conditions([degree_depends_on_poly, coef0_condition, gamma_condition])
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {'shortname': 'Nystroem',
+                'name': 'Nystroem kernel approximation',
+                'handles_sparse': True
+                }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
new file mode 100644
index 000000000..9f542acd0
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
@@ -0,0 +1,61 @@
+from typing import Any, Dict, Optional, Tuple, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import sklearn.decomposition
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+
+class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent):
+    def __init__(self, degree: int = 2, interaction_only: bool = False,
+                 include_bias: bool = False,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        self.degree = degree
+        self.interaction_only = interaction_only
+        self.include_bias = include_bias
+
+        self.random_state = random_state
+        super().__init__()
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+
+        self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures(
+            degree=self.degree, interaction_only=self.interaction_only,
+            include_bias=self.include_bias)
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {'shortname': 'PolynomialFeatures',
+                'name': 'PolynomialFeatures',
+                'handles_sparse': True}
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None,
+        degree: Tuple[Tuple, int] = ((2, 3), 2),
+        interaction_only: Tuple[Tuple, bool] = ((True, False), False),
+        include_bias: Tuple[Tuple, bool] = ((True, False), False)
+    ) -> ConfigurationSpace:
+
+        degree = UniformIntegerHyperparameter("degree", lower=degree[0][0], upper=degree[0][1], default_value=degree[1])
+        interaction_only = CategoricalHyperparameter("interaction_only",
+                                                     choices=interaction_only[0],
+                                                     default_value=interaction_only[1])
+        include_bias = CategoricalHyperparameter("include_bias",
+                                                 choices=include_bias[0],
+                                                 default_value=include_bias[1])
+
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([degree, interaction_only, include_bias])
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
new file mode 100644
index 000000000..c02606c3d
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
@@ -0,0 +1,49 @@
+from typing import Any, Dict, Optional, Tuple, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+)
+
+import numpy as np
+
+import sklearn.preprocessing
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+
+class PowerTransformer(autoPyTorchFeaturePreprocessingComponent):
+    def __init__(self, standardize: bool = True,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        self.standardize = standardize
+
+        self.random_state = random_state
+        super().__init__()
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+        self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson",
+                                                                                standardize=self.standardize,
+                                                                                copy=False)
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {'shortname': 'PowerTransformer',
+                'name': 'Power Transformer',
+                'handles_sparse': True}
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+            dataset_properties: Optional[Dict[str, str]] = None,
+            standardize: Tuple[Tuple, bool] = ((True, False), True)
+    ) -> ConfigurationSpace:
+        standardize = CategoricalHyperparameter("standardize",
+                                                choices=standardize[0],
+                                                default_value=standardize[1])
+
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([standardize])
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
new file mode 100644
index 000000000..8f03e1880
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
@@ -0,0 +1,74 @@
+from math import ceil, floor
+from typing import Any, Dict, Optional, Tuple, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import sklearn.kernel_approximation
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+
+class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
+    def __init__(self, n_components: int = 100,
+                 gamma: float = 1.0,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None
+                 ) -> None:
+        self.n_components = n_components
+        self.gamma = gamma
+        self.random_state = random_state
+        super().__init__()
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+
+        self.preprocessor['numerical'] = sklearn.kernel_approximation.RBFSampler(
+            self.gamma, self.n_components, self.random_state)
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None,
+        n_components: Tuple[Tuple, float, bool] = ((0.5, 0.9), 0.5, True),
+        gamma: Tuple[Tuple, float, bool] = ((3.0517578125e-05, 8), 1.0, True),
+        degree: Tuple[Tuple, int] = ((2, 5), 3),
+        coef0: Tuple[Tuple, float] = ((-1, 1), 0)
+    ) -> ConfigurationSpace:
+
+        if dataset_properties is not None:
+            n_features = len(dataset_properties['numerical_columns'])
+            # if numerical features are 1, set log to False
+            if n_features == 1:
+                log = False
+            else:
+                log = n_components[2]
+            n_components = ((floor(n_components[0][0] * n_features), ceil(n_components[0][1] * n_features)),
+                            ceil(n_components[1] * n_features), log)
+        else:
+            n_components = ((10, 2000), 100, True)
+
+        n_components = UniformIntegerHyperparameter(
+            "n_components", lower=n_components[0][0], upper=n_components[0][1],
+            default_value=n_components[1], log=n_components[2])
+        gamma = UniformFloatHyperparameter(
+            "gamma",
+            lower=gamma[0][0], upper=gamma[0][1],
+            log=gamma[2],
+            default_value=gamma[1],
+        )
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([n_components, gamma])
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {'shortname': 'KitchenSink',
+                'name': 'Random Kitchen Sinks',
+                'handles_sparse': True
+                }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
new file mode 100644
index 000000000..558fdb4de
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
@@ -0,0 +1,55 @@
+from math import floor
+from typing import Any, Dict, Optional, Tuple, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import sklearn.decomposition
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing\
+    .base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+
+class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent):
+    def __init__(self, target_dim: int = 128,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        self.target_dim = target_dim
+
+        self.random_state = random_state
+        super().__init__()
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+
+        self.preprocessor['numerical'] = sklearn.decomposition.TruncatedSVD(self.target_dim, algorithm="randomized")
+
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {'shortname': 'TruncSVD',
+                'name': 'Truncated Singular Value Decomposition',
+                'handles_sparse': True}
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None,
+        target_dim: Tuple[Tuple, float] = ((0.5, 0.9), 0.5),
+    ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is not None:
+            n_features = len(dataset_properties['numerical_columns'])
+            target_dim = ((floor(target_dim[0][0] * n_features), floor(target_dim[0][1] * n_features)),
+                          floor(target_dim[1] * n_features))
+        else:
+            target_dim = ((10, 256), 128)
+        target_dim = UniformIntegerHyperparameter("target_dim", lower=target_dim[0][0],
+                                                  upper=target_dim[0][1], default_value=target_dim[1])
+        cs.add_hyperparameters([target_dim])
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
new file mode 100644
index 000000000..8c85bbf30
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
@@ -0,0 +1,27 @@
+from typing import Any, Dict, List
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
+    autoPyTorchTabularPreprocessingComponent
+)
+
+
+class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
+    _required_properties: List[str] = ['handles_sparse']
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the fitted feature preprocessor into the 'X' dictionary and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
+            raise AttributeError("{} can't tranform without fitting first"
+                                 .format(self.__class__.__name__))
+        X.update({'feature_preprocessor': self.preprocessor})
+        return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor_choice.py
new file mode 100644
index 000000000..56af71877
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor_choice.py
@@ -0,0 +1,116 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
+    base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
+
+preprocessing_directory = os.path.split(__file__)[0]
+_preprocessors = find_components(__package__,
+                                 preprocessing_directory,
+                                 autoPyTorchFeaturePreprocessingComponent)
+_addons = ThirdPartyComponents(autoPyTorchFeaturePreprocessingComponent)
+
+
+def add_feature_preprocessor(feature_preprocessor: autoPyTorchFeaturePreprocessingComponent) -> None:
+    _addons.add_component(feature_preprocessor)
+
+
+class FeatureProprocessorChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing feature_preprocessor component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available feature_preprocessor components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all feature preprocessor components available
+                as choices for encoding the categorical columns
+        """
+        components: Dict = OrderedDict()
+        components.update(_preprocessors)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_ = self.get_available_components(dataset_properties=dataset_properties,
+                                                   include=include,
+                                                   exclude=exclude)
+
+        if len(available_) == 0:
+            raise ValueError("no feature preprocessors found, please add a feature preprocessor")
+
+        if default is None:
+            defaults = ['NoFeaturePreprocessor',
+                        'FastICA',
+                        'KernelPCA',
+                        'RandomKitchenSinks',
+                        'Nystroem',
+                        'PolynomialFeatures',
+                        'PowerTransformer',
+                        'TruncatedSVD',
+                        ]
+            for default_ in defaults:
+                if default_ in available_:
+                    if include is not None and default_ not in include:
+                        continue
+                    if exclude is not None and default_ in exclude:
+                        continue
+                    default = default_
+                    break
+
+        # add only no feature preprocessor to choice hyperparameters in case the dataset is only categorical
+        if len(dataset_properties['numerical_columns']) == 0:
+            default = 'NoFeaturePreprocessor'
+            if include is not None and default not in include:
+                raise ValueError("Provided {} in include, however, "
+                                 "the dataset is incompatible with it".format(include))
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         ['NoFeaturePreprocessor'],
+                                                         default_value=default)
+        else:
+            # Truncated SVD requires n_features > n_components
+            if len(dataset_properties['numerical_columns']) == 1:
+                del available_['TruncatedSVD']
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         list(available_.keys()),
+                                                         default_value=default)
+
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of early_preprocessor choices
+        for name in preprocessor.choices:
+            updates = self._get_search_space_updates(prefix=name)
+            config_space = available_[name].get_hyperparameter_search_space(dataset_properties,  # type:ignore
+                                                                            **updates)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, config_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py
index 718c80d39..6fdcc47bc 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py
@@ -74,6 +74,9 @@ def get_hyperparameter_search_space(self,
         # add only no scaler to choice hyperparameters in case the dataset is only categorical
         if len(dataset_properties['numerical_columns']) == 0:
             default = 'NoScaler'
+            if include is not None and default not in include:
+                raise ValueError("Provided {} in include, however, "
+                                 "the dataset is incompatible with it".format(include))
             preprocessor = CSH.CategoricalHyperparameter('__choice__',
                                                          ['NoScaler'],
                                                          default_value=default)
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
index 5d6def24a..6053bdf6d 100644
--- a/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
+++ b/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
@@ -31,7 +31,6 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "EarlyPreprocessing":
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
         transforms = get_preprocess_transforms(X)
-
         if X['dataset_properties']['is_small_preprocess']:
             if 'X_train' in X:
                 X_train = X['X_train']
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py
index d355005e8..2557e92b8 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py
@@ -1,5 +1,11 @@
 from abc import abstractmethod
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, Iterable, Optional, Tuple
+
+import numpy as np
+
+import pandas as pd
+
+from scipy.sparse import csr_matrix
 
 import torch
 from torch import nn
@@ -8,6 +14,7 @@
 from autoPyTorch.pipeline.components.base_component import (
     autoPyTorchComponent,
 )
+from autoPyTorch.utils.common import FitRequirement
 
 
 class NetworkBackboneComponent(autoPyTorchComponent):
@@ -19,8 +26,15 @@ class NetworkBackboneComponent(autoPyTorchComponent):
     def __init__(self,
                  **kwargs: Any):
         super().__init__()
+        self.add_fit_requirements([
+            FitRequirement('is_small_preprocess', (bool,), user_defined=True, dataset_property=True),
+            FitRequirement('X_train', (np.ndarray, pd.DataFrame, csr_matrix), user_defined=True,
+                           dataset_property=False),
+            FitRequirement('input_shape', (Iterable,), user_defined=True, dataset_property=True),
+            FitRequirement('tabular_transformer', (BaseEstimator,), user_defined=False, dataset_property=False)])
         self.backbone: nn.Module = None
         self.config = kwargs
+        self.input_shape: Optional[Iterable] = None
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         """
@@ -32,8 +46,17 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         Returns:
             Self
         """
+        self.check_requirements(X, y)
+        X_train = X['X_train']
 
-        input_shape = X['X_train'].shape[1:]
+        if X["dataset_properties"]["is_small_preprocess"]:
+            input_shape = X_train.shape[1:]
+        else:
+            # get input shape by transforming first two elements of the training set
+            column_transformer = X['tabular_transformer'].preprocessor
+            input_shape = column_transformer.transform(X_train[:1]).shape[1:]
+
+        self.input_shape = input_shape
 
         self.backbone = self.build_backbone(
             input_shape=input_shape,
@@ -42,13 +65,15 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         """
-        Adds the network head into the fit dictionary 'X' and returns it.
-
+        Adds the network backbone into the fit dictionary 'X' and returns it.
+        Also, updates the input shape as from this point only the shape of
+        the transformed dataset is used
         Args:
             X (Dict[str, Any]): 'X' dictionary
         Returns:
             (Dict[str, Any]): the updated 'X' dictionary
         """
+        X['dataset_properties'].update({'input_shape': self.input_shape})
         X.update({'network_backbone': self.backbone})
         return X
 
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index 45a96a362..c20ca5ed2 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -50,7 +50,7 @@ def forward(
     def backward(ctx: typing.Any,
                  grad_output: torch.Tensor
                  ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        x1, x2, alpha, beta = ctx.saved_variables
+        x1, x2, alpha, beta = ctx.saved_tensors
         grad_x1 = grad_x2 = grad_alpha = grad_beta = None
 
         if ctx.needs_input_grad[0]:
@@ -81,7 +81,7 @@ def forward(ctx: typing.Any,
     def backward(ctx: typing.Any,
                  grad_output: torch.Tensor
                  ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        x, alpha, beta, bl = ctx.saved_variables
+        x, alpha, beta, bl = ctx.saved_tensors
         grad_x = grad_alpha = grad_beta = grad_bl = None
 
         if ctx.needs_input_grad[0]:
diff --git a/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py b/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py
index be2a9c7dc..ced7630fa 100644
--- a/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py
+++ b/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py
@@ -1,11 +1,12 @@
 from abc import abstractmethod
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, Iterable, Tuple
 
 import torch.nn as nn
 
 from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES
 from autoPyTorch.pipeline.components.base_component import BaseEstimator, autoPyTorchComponent
 from autoPyTorch.pipeline.components.setup.network_backbone.utils import get_output_shape
+from autoPyTorch.utils.common import FitRequirement
 
 
 class NetworkHeadComponent(autoPyTorchComponent):
@@ -17,6 +18,12 @@ class NetworkHeadComponent(autoPyTorchComponent):
     def __init__(self,
                  **kwargs: Any):
         super().__init__()
+        self.add_fit_requirements([
+            FitRequirement('input_shape', (Iterable,), user_defined=True, dataset_property=True),
+            FitRequirement('num_classes', (int,), user_defined=True, dataset_property=True),
+            FitRequirement('task_type', (str,), user_defined=True, dataset_property=True),
+            FitRequirement('output_shape', (Iterable, int), user_defined=True, dataset_property=True),
+        ])
         self.head: nn.Module = None
         self.config = kwargs
 
@@ -30,7 +37,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         Returns:
             Self
         """
-        input_shape = X['X_train'].shape[1:]
+        input_shape = X['dataset_properties']['input_shape']
         output_shape = (X['dataset_properties']['num_classes'],) if \
             STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']] in \
             CLASSIFICATION_TASKS else X['dataset_properties']['output_shape']
diff --git a/autoPyTorch/pipeline/components/training/losses.py b/autoPyTorch/pipeline/components/training/losses.py
index 705246310..84d914f1c 100644
--- a/autoPyTorch/pipeline/components/training/losses.py
+++ b/autoPyTorch/pipeline/components/training/losses.py
@@ -47,7 +47,7 @@ def get_supported_losses(task: int, output_type: int) -> Dict[str, Type[Loss]]:
     return supported_losses
 
 
-def get_loss_instance(dataset_properties: Dict[str, Any], name: Optional[str] = None) -> Loss:
+def get_loss_instance(dataset_properties: Dict[str, Any], name: Optional[str] = None) -> Type[Loss]:
     assert 'task_type' in dataset_properties, \
         "Expected dataset_properties to have task_type got {}".format(dataset_properties.keys())
     assert 'output_type' in dataset_properties, \
@@ -68,4 +68,4 @@ def get_loss_instance(dataset_properties: Dict[str, Any], name: Optional[str] =
     else:
         loss = get_default(task)
 
-    return loss()
+    return loss
diff --git a/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py b/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py
index a49e17682..5bcf0f861 100644
--- a/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py
@@ -2,6 +2,7 @@
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
     UniformFloatHyperparameter,
 )
 
@@ -9,11 +10,13 @@
 
 import torch
 
+from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent
 
 
 class MixUpTrainer(BaseTrainerComponent):
-    def __init__(self, alpha: float, random_state: typing.Optional[np.random.RandomState] = None):
+    def __init__(self, alpha: float, weighted_loss: bool = False,
+                 random_state: typing.Optional[np.random.RandomState] = None):
         """
         This class handles the training of a network for a single given epoch.
 
@@ -22,6 +25,7 @@ def __init__(self, alpha: float, random_state: typing.Optional[np.random.RandomS
 
         """
         super().__init__(random_state=random_state)
+        self.weighted_loss = weighted_loss
         self.alpha = alpha
 
     def data_preparation(self, X: np.ndarray, y: np.ndarray,
@@ -62,10 +66,16 @@ def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.A
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties: typing.Optional[typing.Dict] = None,
-                                        alpha: typing.Tuple[typing.Tuple[float, float], float] = ((0, 1), 0.2)
+                                        alpha: typing.Tuple[typing.Tuple[float, float], float] = ((0, 1), 0.2),
+                                        weighted_loss: typing.Tuple[typing.Tuple, bool] = ((True, False), True)
                                         ) -> ConfigurationSpace:
         alpha = UniformFloatHyperparameter(
             "alpha", alpha[0][0], alpha[0][1], default_value=alpha[1])
+        weighted_loss = CategoricalHyperparameter("weighted_loss", choices=weighted_loss[0],
+                                                  default_value=weighted_loss[1])
         cs = ConfigurationSpace()
         cs.add_hyperparameters([alpha])
+        if dataset_properties is not None:
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] not in CLASSIFICATION_TASKS:
+                cs.add_hyperparameters([weighted_loss])
         return cs
diff --git a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py
index 454d4c625..dbd190c59 100644
--- a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py
@@ -1,13 +1,27 @@
 import typing
 
 from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
 import numpy as np
 
+from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent
 
 
 class StandardTrainer(BaseTrainerComponent):
+    def __init__(self, weighted_loss: bool = False,
+                 random_state: typing.Optional[np.random.RandomState] = None):
+        """
+        This class handles the training of a network for a single given epoch.
+
+        Args:
+            weighted_loss (bool): whether to use weighted loss
+
+        """
+        super().__init__(random_state=random_state)
+        self.weighted_loss = weighted_loss
+
     def data_preparation(self, X: np.ndarray, y: np.ndarray,
                          ) -> typing.Tuple[np.ndarray, typing.Dict[str, np.ndarray]]:
         """
@@ -40,7 +54,13 @@ def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.A
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties: typing.Optional[typing.Dict] = None,
-                                        **kwargs: typing.Any
+                                        weighted_loss: typing.Tuple[typing.Tuple, bool] = ((True, False), True)
                                         ) -> ConfigurationSpace:
+        weighted_loss = CategoricalHyperparameter("weighted_loss", choices=weighted_loss[0],
+                                                  default_value=weighted_loss[1])
         cs = ConfigurationSpace()
+        if dataset_properties is not None:
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] not in CLASSIFICATION_TASKS:
+                cs.add_hyperparameters([weighted_loss])
+
         return cs
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
index 64e30ae45..69665007e 100644
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -3,14 +3,18 @@
 
 import numpy as np
 
+import pandas as pd
+
 import torch
 from torch.autograd import Variable
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
 from torch.utils.tensorboard.writer import SummaryWriter
 
+from autoPyTorch.constants import BINARY
 from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent
 from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
+from autoPyTorch.utils.implementations import get_loss_weight_strategy
 from autoPyTorch.utils.logging_ import PicklableClientLogger
 
 
@@ -165,6 +169,7 @@ class BaseTrainerComponent(autoPyTorchTrainingComponent):
     def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None) -> None:
         super().__init__()
         self.random_state = random_state
+        self.weighted_loss: bool = False
 
     def prepare(
         self,
@@ -176,7 +181,9 @@ def prepare(
         device: torch.device,
         metrics_during_training: bool,
         scheduler: _LRScheduler,
-        task_type: int
+        task_type: int,
+        output_type: int,
+        labels: Union[np.ndarray, torch.Tensor, pd.DataFrame]
     ) -> None:
 
         # Save the device to be used
@@ -185,8 +192,20 @@ def prepare(
         # Setup the metrics
         self.metrics = metrics
 
+        # Weights for the loss function
+        weights = None
+        kwargs = {}
+        if self.weighted_loss:
+            weights = self.get_class_weights(output_type, labels)
+            if output_type == BINARY:
+                kwargs['pos_weight'] = weights
+            else:
+                kwargs['weight'] = weights
+
+        criterion = criterion(**kwargs) if weights is not None else criterion()
+
         # Setup the loss function
-        self.criterion = criterion.to(device)
+        self.criterion = criterion
 
         # setup the model
         self.model = model.to(device)
@@ -245,7 +264,6 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int,
         targets_data = list()
 
         for step, (data, targets) in enumerate(train_loader):
-
             if self.budget_tracker.is_max_time_reached():
                 logger.info("Stopping training as max time reached")
                 break
@@ -358,6 +376,14 @@ def compute_metrics(self, outputs_data: np.ndarray, targets_data: np.ndarray
         targets_data = torch.cat(targets_data, dim=0)
         return calculate_score(targets_data, outputs_data, self.task_type, self.metrics)
 
+    def get_class_weights(self, output_type: int, labels: Union[np.ndarray, torch.Tensor, pd.DataFrame]
+                          ) -> np.ndarray:
+        strategy = get_loss_weight_strategy(output_type)
+        weights = strategy(y=labels)
+        weights = torch.from_numpy(weights)
+        weights = weights.type(torch.FloatTensor).to(self.device)
+        return weights
+
     def data_preparation(self, X: np.ndarray, y: np.ndarray,
                          ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
         """
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
index 92243311c..e6a630fb6 100755
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
@@ -19,7 +19,7 @@
 from torch.optim.lr_scheduler import _LRScheduler
 from torch.utils.tensorboard.writer import SummaryWriter
 
-from autoPyTorch.constants import STRING_TO_TASK_TYPES
+from autoPyTorch.constants import STRING_TO_OUTPUT_TYPES, STRING_TO_TASK_TYPES
 from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
 from autoPyTorch.pipeline.components.base_component import (
     ThirdPartyComponents,
@@ -56,6 +56,7 @@ class TrainerChoice(autoPyTorchChoice):
     epoch happens, that is, how batches of data are fed and used to train the network.
 
     """
+
     def __init__(self,
                  dataset_properties: Dict[str, Any],
                  random_state: Optional[np.random.RandomState] = None
@@ -97,11 +98,11 @@ def get_components(self) -> Dict[str, autoPyTorchComponent]:
         return components
 
     def get_hyperparameter_search_space(
-        self,
-        dataset_properties: Optional[Dict[str, str]] = None,
-        default: Optional[str] = None,
-        include: Optional[List[str]] = None,
-        exclude: Optional[List[str]] = None,
+            self,
+            dataset_properties: Optional[Dict[str, str]] = None,
+            default: Optional[str] = None,
+            include: Optional[List[str]] = None,
+            exclude: Optional[List[str]] = None,
     ) -> ConfigurationSpace:
         """Returns the configuration space of the current chosen components
 
@@ -121,6 +122,8 @@ def get_hyperparameter_search_space(
         if dataset_properties is None:
             dataset_properties = {}
 
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
         # Compile a list of legal trainers for this problem
         available_trainers = self.get_available_components(
             dataset_properties=dataset_properties,
@@ -270,7 +273,9 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Modu
             device=self.get_device(X),
             metrics_during_training=X['metrics_during_training'],
             scheduler=X['lr_scheduler'],
-            task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']]
+            task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']],
+            output_type=STRING_TO_OUTPUT_TYPES[X['dataset_properties']['output_type']],
+            labels=X['y_train'][X['backend'].load_datamanager().splits[X['split_id']][0]]
         )
         total_parameter_count, trainable_parameter_count = self.count_parameters(X['network'])
         self.run_summary = RunSummary(
@@ -490,6 +495,10 @@ def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
                     config_option
                 ))
 
+        # For early stopping, we need to know the patience
+        if 'early_stopping' not in X:
+            raise ValueError('To fit a Trainer, expected fit dictionary to have early_stopping')
+
     def get_device(self, X: Dict[str, Any]) -> torch.device:
         """
         Returns the device to do torch operations
diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
index 5059f3536..3540d9660 100644
--- a/autoPyTorch/pipeline/tabular_classification.py
+++ b/autoPyTorch/pipeline/tabular_classification.py
@@ -16,6 +16,8 @@
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
     EncoderChoice
 )
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
+    base_feature_preprocessor_choice import FeatureProprocessorChoice
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
 from autoPyTorch.pipeline.components.setup.early_preprocessor.EarlyPreprocessing import EarlyPreprocessing
@@ -59,25 +61,25 @@ class TabularClassificationPipeline(ClassifierMixin, BasePipeline):
     """
 
     def __init__(
-        self,
-        config: Optional[Configuration] = None,
-        steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
-        dataset_properties: Optional[Dict[str, Any]] = None,
-        include: Optional[Dict[str, Any]] = None,
-        exclude: Optional[Dict[str, Any]] = None,
-        random_state: Optional[np.random.RandomState] = None,
-        init_params: Optional[Dict[str, Any]] = None,
-        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
+            self,
+            config: Optional[Configuration] = None,
+            steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
+            dataset_properties: Optional[Dict[str, Any]] = None,
+            include: Optional[Dict[str, Any]] = None,
+            exclude: Optional[Dict[str, Any]] = None,
+            random_state: Optional[np.random.RandomState] = None,
+            init_params: Optional[Dict[str, Any]] = None,
+            search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
     ):
         super().__init__(
             config, steps, dataset_properties, include, exclude,
             random_state, init_params, search_space_updates)
 
     def fit_transformer(
-        self,
-        X: np.ndarray,
-        y: np.ndarray,
-        fit_params: Optional[Dict[str, Any]] = None
+            self,
+            X: np.ndarray,
+            y: np.ndarray,
+            fit_params: Optional[Dict[str, Any]] = None
     ) -> Tuple[np.ndarray, Optional[Dict[str, Any]]]:
         """Fits the pipeline given a training (X,y) pair
 
@@ -239,6 +241,7 @@ def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
             ("imputer", SimpleImputer()),
             ("encoder", EncoderChoice(default_dataset_properties)),
             ("scaler", ScalerChoice(default_dataset_properties)),
+            ("feature_preprocessor", FeatureProprocessorChoice(default_dataset_properties)),
             ("tabular_transformer", TabularColumnTransformer()),
             ("preprocessing", EarlyPreprocessing()),
             ("network_backbone", NetworkBackboneChoice(default_dataset_properties)),
diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py
index 3143ced11..88af11531 100644
--- a/autoPyTorch/utils/common.py
+++ b/autoPyTorch/utils/common.py
@@ -133,3 +133,9 @@ def hash_array_or_matrix(X: Union[np.ndarray, pd.DataFrame]) -> str:
 
     hash = m.hexdigest()
     return hash
+
+
+def subsampler(data: Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix],
+               x: Union[np.ndarray, List[int]]
+               ) -> Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix]:
+    return data[x] if isinstance(data, (np.ndarray, scipy.sparse.csr_matrix)) else data.iloc[x]
diff --git a/autoPyTorch/utils/implementations.py b/autoPyTorch/utils/implementations.py
new file mode 100644
index 000000000..15f1758e1
--- /dev/null
+++ b/autoPyTorch/utils/implementations.py
@@ -0,0 +1,48 @@
+from typing import Callable, Union
+
+import numpy as np
+
+import torch
+
+from autoPyTorch.constants import BINARY
+
+
+def get_loss_weight_strategy(output_type: int) -> Callable:
+    if output_type == BINARY:
+        return LossWeightStrategyWeightedBinary()
+    else:
+        return LossWeightStrategyWeighted()
+
+
+class LossWeightStrategyWeighted():
+    def __call__(self, y: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
+        if isinstance(y, torch.Tensor):
+            y = y.detach().cpu().numpy() if y.is_cuda else y.numpy()
+        if isinstance(y[0], str):
+            y = y.astype('float64')
+        counts = np.sum(y, axis=0)
+        total_weight = y.shape[0]
+
+        if len(y.shape) > 1:
+            weight_per_class = total_weight / y.shape[1]
+            weights = (np.ones(y.shape[1]) * weight_per_class) / np.maximum(counts, 1)
+        else:
+            classes, counts = np.unique(y, axis=0, return_counts=True)
+            classes, counts = classes[::-1], counts[::-1]
+            weight_per_class = total_weight / classes.shape[0]
+            weights = (np.ones(classes.shape[0]) * weight_per_class) / counts
+
+        return weights
+
+
+class LossWeightStrategyWeightedBinary():
+    def __call__(self, y: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
+        if isinstance(y, torch.Tensor):
+            y = y.detach().cpu().numpy() if y.is_cuda else y.numpy()
+        if isinstance(y[0], str):
+            y = y.astype('float64')
+        counts_one = np.sum(y, axis=0)
+        counts_zero = counts_one + (-y.shape[0])
+        weights = counts_zero / np.maximum(counts_one, 1)
+
+        return np.array(weights)
diff --git a/requirements.txt b/requirements.txt
index 366837cd6..cced8bcf4 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,8 @@
 pandas
 torch
 torchvision
+tensorboard
 scikit-learn>=0.22.0,<0.23
-torchvision
-pytorch-lightning
 numpy
 scipy
 lockfile
@@ -16,4 +15,4 @@ dask
 distributed>=2.2.0
 catboost
 lightgbm
-
+flaky
diff --git a/setup.py b/setup.py
index 07ab30a8c..c496a48c1 100755
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 # noinspection PyInterpreter
 setuptools.setup(
     name="autoPyTorch",
-    version="0.0.2",
+    version="0.0.3",
     author="AutoML Freiburg",
     author_email="zimmerl@informatik.uni-freiburg.de",
     description=("Auto-PyTorch searches neural architectures using BO-HB"),
diff --git a/test/conftest.py b/test/conftest.py
index 8ef3cc28f..d16d40546 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -156,30 +156,29 @@ def fit_dictionary(request):
 def fit_dictionary_numerical_only(backend):
     X, y = make_classification(
         n_samples=200,
-        n_features=4,
-        n_informative=3,
-        n_redundant=1,
+        n_features=10,
+        n_informative=6,
+        n_redundant=4,
         n_repeated=0,
         n_classes=2,
         n_clusters_per_class=2,
         shuffle=True,
         random_state=0
     )
+    X = X.astype('float64')
     datamanager = TabularDataset(
         X=X, Y=y,
         X_test=X, Y_test=y,
     )
 
-    info = {'task_type': datamanager.task_type,
-            'output_type': datamanager.output_type,
-            'issparse': datamanager.issparse,
-            'numerical_columns': datamanager.numerical_columns,
-            'categorical_columns': datamanager.categorical_columns}
+    info = datamanager.get_required_dataset_info()
 
     dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info))
     fit_dictionary = {
         'X_train': X,
         'y_train': y,
+        'train_indices': datamanager.splits[0][0],
+        'val_indices': datamanager.splits[0][1],
         'dataset_properties': dataset_properties,
         'num_run': np.random.randint(50),
         'device': 'cpu',
@@ -209,16 +208,14 @@ def fit_dictionary_categorical_only(backend):
         X=X, Y=y,
         X_test=X, Y_test=y,
     )
-    info = {'task_type': datamanager.task_type,
-            'output_type': datamanager.output_type,
-            'issparse': datamanager.issparse,
-            'numerical_columns': datamanager.numerical_columns,
-            'categorical_columns': datamanager.categorical_columns}
+    info = datamanager.get_required_dataset_info()
 
     dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info))
     fit_dictionary = {
         'X_train': X,
         'y_train': y,
+        'train_indices': datamanager.splits[0][0],
+        'val_indices': datamanager.splits[0][1],
         'dataset_properties': dataset_properties,
         'num_run': np.random.randint(50),
         'device': 'cpu',
@@ -250,17 +247,15 @@ def fit_dictionary_num_and_categorical(backend):
         X=X, Y=y,
         X_test=X, Y_test=y,
     )
-    info = {'task_type': datamanager.task_type,
-            'output_type': datamanager.output_type,
-            'issparse': datamanager.issparse,
-            'numerical_columns': datamanager.numerical_columns,
-            'categorical_columns': datamanager.categorical_columns}
+    info = datamanager.get_required_dataset_info()
 
     dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info))
 
     fit_dictionary = {
         'X_train': X,
         'y_train': y,
+        'train_indices': datamanager.splits[0][0],
+        'val_indices': datamanager.splits[0][1],
         'dataset_properties': dataset_properties,
         'num_run': np.random.randint(50),
         'device': 'cpu',
diff --git a/test/test_datasets/test_tabular_dataset.py b/test/test_datasets/test_tabular_dataset.py
index dfc72be77..6d5dacd8d 100644
--- a/test/test_datasets/test_tabular_dataset.py
+++ b/test/test_datasets/test_tabular_dataset.py
@@ -98,11 +98,7 @@ def test_get_dataset_properties(self):
         backend.save_datamanager(datamanager)
 
         datamanager = backend.load_datamanager()
-        info = {'task_type': datamanager.task_type,
-                'output_type': datamanager.output_type,
-                'issparse': datamanager.issparse,
-                'numerical_columns': datamanager.numerical_columns,
-                'categorical_columns': datamanager.categorical_columns}
+        info = datamanager.get_required_dataset_info()
         dataset_requirements = get_dataset_requirements(info)
 
         dataset_properties = datamanager.get_dataset_properties(dataset_requirements)
diff --git a/test/test_pipeline/components/base.py b/test/test_pipeline/components/base.py
index 120fa9fcd..6ad3ad824 100644
--- a/test/test_pipeline/components/base.py
+++ b/test/test_pipeline/components/base.py
@@ -1,13 +1,22 @@
 import logging
 import unittest
+from typing import Any, Dict, List, Optional, Tuple
 
 from sklearn.datasets import make_classification
 
 import torch
 
-from autoPyTorch.constants import STRING_TO_TASK_TYPES
+from autoPyTorch.constants import STRING_TO_OUTPUT_TYPES, STRING_TO_TASK_TYPES
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import \
+    TabularColumnTransformer
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import \
+    EncoderChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
 from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import BudgetTracker
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
 
 
 class BaseTraining(unittest.TestCase):
@@ -40,7 +49,7 @@ def setUp(self):
         layers.append(torch.nn.Sigmoid())
         layers.append(torch.nn.Linear(4, 2))
         self.model = torch.nn.Sequential(*layers)
-        self.criterion = torch.nn.CrossEntropyLoss()
+        self.criterion = torch.nn.CrossEntropyLoss
         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01)
         self.device = torch.device('cpu')
         self.logger = logging.getLogger('test')
@@ -51,9 +60,13 @@ def setUp(self):
             max_epochs=self.epochs,
         )
         self.task_type = STRING_TO_TASK_TYPES[self.dataset_properties['task_type']]
+        self.output_type = STRING_TO_OUTPUT_TYPES[self.dataset_properties['output_type']]
 
     def _overfit_model(self):
         self.model.train()
+        # initialise the criterion as it is
+        # not being done in __init__
+        self.criterion = self.criterion()
         for epoch in range(self.epochs):
             total_loss = 0
             for x, y in self.loader:
@@ -67,3 +80,29 @@ def _overfit_model(self):
                 # Backward pass
                 loss.backward()
                 self.optimizer.step()
+
+
+class TabularPipeline(TabularClassificationPipeline):
+    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
+                            ) -> List[Tuple[str, autoPyTorchChoice]]:
+        """
+        Defines what steps a pipeline should follow.
+        The step itself has choices given via autoPyTorchChoice.
+
+        Returns:
+            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
+                by the pipeline.
+        """
+        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]
+
+        default_dataset_properties = {'target_type': 'tabular_classification'}
+        if dataset_properties is not None:
+            default_dataset_properties.update(dataset_properties)
+
+        steps.extend([
+            ("imputer", SimpleImputer()),
+            ("encoder", EncoderChoice(default_dataset_properties)),
+            ("scaler", ScalerChoice(default_dataset_properties)),
+            ("tabular_transformer", TabularColumnTransformer()),
+        ])
+        return steps
diff --git a/test/test_pipeline/components/test_feature_preprocessor.py b/test/test_pipeline/components/test_feature_preprocessor.py
new file mode 100644
index 000000000..a812929e9
--- /dev/null
+++ b/test/test_pipeline/components/test_feature_preprocessor.py
@@ -0,0 +1,76 @@
+import numpy as np
+
+import pytest
+
+from sklearn.base import BaseEstimator
+from sklearn.compose import make_column_transformer
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
+    NoFeaturePreprocessor import NoFeaturePreprocessor
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\
+    base_feature_preprocessor_choice import FeatureProprocessorChoice
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
+
+
+@pytest.fixture(params=['TruncatedSVD', 'PolynomialFeatures', 'PowerTransformer',
+                        'Nystroem', 'KernelPCA', 'RandomKitchenSinks'])
+def preprocessor(request):
+    return request.param
+
+
+@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only',
+                                            'fit_dictionary_num_and_categorical'], indirect=True)
+class TestFeaturePreprocessors:
+
+    def test_feature_preprocessor(self, fit_dictionary, preprocessor):
+        preprocessor = FeatureProprocessorChoice(
+            dataset_properties=fit_dictionary['dataset_properties']
+        ).get_components()[preprocessor]()
+        configuration = preprocessor.\
+            get_hyperparameter_search_space(dataset_properties=fit_dictionary["dataset_properties"]) \
+            .get_default_configuration().get_dictionary()
+        preprocessor = preprocessor.set_params(**configuration)
+        preprocessor.fit(fit_dictionary)
+        X = preprocessor.transform(fit_dictionary)
+        sklearn_preprocessor = X['feature_preprocessor']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        assert isinstance(X['feature_preprocessor'], dict)
+        if isinstance(preprocessor, NoFeaturePreprocessor):
+            assert sklearn_preprocessor is None, sklearn_preprocessor
+            pytest.skip("Tests not relevant for {}".format(preprocessor.__class__.__name__))
+        assert isinstance(sklearn_preprocessor, BaseEstimator)
+        assert (X['feature_preprocessor']['categorical']) is None
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((sklearn_preprocessor,
+                                                      X['dataset_properties']['numerical_columns']),
+                                                     remainder='passthrough')
+        column_transformer.fit(X['X_train'])
+
+        transformed = column_transformer.transform(X['X_train'])
+        assert isinstance(transformed, np.ndarray)
+
+    def test_pipeline_fit_include(self, fit_dictionary, preprocessor):
+        """
+        This test ensures that a tabular classification
+        pipeline can be fit with all preprocessors
+        in the include
+        """
+
+        fit_dictionary['epochs'] = 1
+
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'],
+            include={'feature_preprocessor': [preprocessor]})
+        cs = pipeline.get_hyperparameter_search_space()
+        config = cs.sample_configuration()
+        pipeline.set_hyperparameters(config)
+        pipeline.fit(fit_dictionary)
+
+        # To make sure we fitted the model, there should be a
+        # run summary object with accuracy
+        run_summary = pipeline.named_steps['trainer'].run_summary
+        assert run_summary is not None
+
+        assert preprocessor == pipeline.named_steps['feature_preprocessor'].choice.__class__.__name__
diff --git a/test/test_pipeline/components/test_feature_preprocessor_choice.py b/test/test_pipeline/components/test_feature_preprocessor_choice.py
new file mode 100644
index 000000000..52d55c6df
--- /dev/null
+++ b/test/test_pipeline/components/test_feature_preprocessor_choice.py
@@ -0,0 +1,52 @@
+import copy
+import unittest
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \
+    base_feature_preprocessor_choice import FeatureProprocessorChoice
+
+
+class TestFeaturePreprocessorChoice(unittest.TestCase):
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the feature preprocessor
+        choice"""
+        dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': [5]}
+        feature_preprocessor_choice = FeatureProprocessorChoice(dataset_properties)
+        cs = feature_preprocessor_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the search space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(feature_preprocessor_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            feature_preprocessor_choice.set_hyperparameters(config)
+
+            self.assertEqual(feature_preprocessor_choice.choice.__class__,
+                             feature_preprocessor_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(feature_preprocessor_choice.choice))
+                self.assertEqual(value, feature_preprocessor_choice.choice.__dict__[key])
+
+    def test_only_categorical(self):
+        dataset_properties = {'numerical_columns': [], 'categorical_columns': list(range(4))}
+
+        chooser = FeatureProprocessorChoice(dataset_properties)
+        configspace = chooser.get_hyperparameter_search_space().sample_configuration().get_dictionary()
+        self.assertEqual(configspace['__choice__'], 'NoFeaturePreprocessor')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_setup_networks.py b/test/test_pipeline/components/test_setup_networks.py
index 2e3c07ccc..46debb0c5 100644
--- a/test/test_pipeline/components/test_setup_networks.py
+++ b/test/test_pipeline/components/test_setup_networks.py
@@ -1,3 +1,5 @@
+import flaky
+
 import pytest
 
 import torch
@@ -15,6 +17,7 @@ def head(request):
     return request.param
 
 
+@flaky.flaky(max_runs=3)
 @pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only',
                                             'fit_dictionary_categorical_only',
                                             'fit_dictionary_num_and_categorical'], indirect=True)
diff --git a/test/test_pipeline/components/test_tabular_column_transformer.py b/test/test_pipeline/components/test_tabular_column_transformer.py
index 08d891a14..5eae26f69 100644
--- a/test/test_pipeline/components/test_tabular_column_transformer.py
+++ b/test/test_pipeline/components/test_tabular_column_transformer.py
@@ -1,115 +1,52 @@
-import unittest
-from typing import Any, Dict, List, Optional, Tuple
+from test.test_pipeline.components.base import TabularPipeline
 
 import numpy as np
 
+import pytest
+
 from scipy.sparse import csr_matrix
 
 from sklearn.compose import ColumnTransformer
 
-from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import (
     TabularColumnTransformer
 )
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
-    EncoderChoice
-)
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
-from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
-
-
-class TabularPipeline(TabularClassificationPipeline):
-    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
-                            ) -> List[Tuple[str, autoPyTorchChoice]]:
-        """
-        Defines what steps a pipeline should follow.
-        The step itself has choices given via autoPyTorchChoice.
-
-        Returns:
-            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
-                by the pipeline.
-        """
-        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]
-
-        default_dataset_properties = {'target_type': 'tabular_classification'}
-        if dataset_properties is not None:
-            default_dataset_properties.update(dataset_properties)
 
-        steps.extend([
-            ("imputer", SimpleImputer()),
-            ("encoder", EncoderChoice(default_dataset_properties)),
-            ("scaler", ScalerChoice(default_dataset_properties)),
-            ("tabular_transformer", TabularColumnTransformer()),
-        ])
-        return steps
 
+@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only',
+                                            'fit_dictionary_categorical_only',
+                                            'fit_dictionary_num_and_categorical'], indirect=True)
+class TestTabularTransformer:
+    def test_tabular_preprocess(self, fit_dictionary):
 
-class TabularTransformerTest(unittest.TestCase):
-
-    def test_tabular_preprocess_only_numerical(self):
-        dataset_properties = dict(numerical_columns=list(range(15)),
-                                  categorical_columns=[],
-                                  categories=[],
-                                  num_features=15,
-                                  num_classes=2,
-                                  issparse=False)
-        X = dict(X_train=np.random.random((10, 15)),
-                 is_small_preprocess=True,
-                 dataset_properties=dataset_properties
-                 )
-
-        pipeline = TabularPipeline(dataset_properties=dataset_properties)
-        pipeline = pipeline.fit(X)
-        X = pipeline.transform(X)
+        pipeline = TabularPipeline(dataset_properties=fit_dictionary['dataset_properties'])
+        pipeline = pipeline.fit(fit_dictionary)
+        X = pipeline.transform(fit_dictionary)
         column_transformer = X['tabular_transformer']
 
         # check if transformer was added to fit dictionary
-        self.assertIn('tabular_transformer', X.keys())
+        assert 'tabular_transformer' in X.keys()
         # check if transformer is of expected type
         # In this case we expect the tabular transformer not the actual column transformer
         # as the later is not callable and runs into error in the compose transform
-        self.assertIsInstance(column_transformer, TabularColumnTransformer)
+        assert isinstance(column_transformer, TabularColumnTransformer)
 
         data = column_transformer.preprocessor.fit_transform(X['X_train'])
-        self.assertIsInstance(data, np.ndarray)
+        assert isinstance(data, np.ndarray)
 
-    def test_tabular_preprocess_only_categorical(self):
-        dataset_properties = dict(numerical_columns=[],
-                                  categorical_columns=list(range(2)),
-                                  categories=[['male', 'female'], ['germany']],
-                                  num_features=15,
-                                  num_classes=2,
-                                  issparse=False)
-        X = dict(X_train=np.array([['male', 'germany'],
-                                   ['female', 'germany'],
-                                   ['male', 'germany']], dtype=object),
-                 dataset_properties=dataset_properties
-                 )
-        pipeline = TabularPipeline(dataset_properties=dataset_properties)
-        pipeline = pipeline.fit(X)
-        X = pipeline.transform(X)
-        column_transformer = X['tabular_transformer']
-
-        # check if transformer was added to fit dictionary
-        self.assertIn('tabular_transformer', X.keys())
-        # check if transformer is of expected type
-        self.assertIsInstance(column_transformer, TabularColumnTransformer)
-
-        data = column_transformer.preprocessor.fit_transform(X['X_train'])
-        self.assertIsInstance(data, np.ndarray)
-
-    def test_sparse_data(self):
+    def test_sparse_data(self, fit_dictionary):
         X = np.random.binomial(1, 0.1, (100, 2000))
         sparse_X = csr_matrix(X)
         numerical_columns = list(range(2000))
         categorical_columns = []
         train_indices = np.array(range(50))
-        dataset_properties = dict(numerical_columns=numerical_columns, categorical_columns=categorical_columns,
+        dataset_properties = dict(numerical_columns=numerical_columns,
+                                  categorical_columns=categorical_columns,
                                   categories=[],
                                   issparse=True)
         X = {
-            'X_train': sparse_X[train_indices],
+            'X_train': sparse_X,
+            'train_indices': train_indices,
             'dataset_properties': dataset_properties
         }
 
@@ -120,13 +57,9 @@ def test_sparse_data(self):
         column_transformer = X['tabular_transformer']
 
         # check if transformer was added to fit dictionary
-        self.assertIn('tabular_transformer', X.keys())
+        assert 'tabular_transformer' in X.keys()
         # check if transformer is of expected type
-        self.assertIsInstance(column_transformer.preprocessor, ColumnTransformer)
+        assert isinstance(column_transformer.preprocessor, ColumnTransformer)
 
         data = column_transformer.preprocessor.fit_transform(X['X_train'])
-        self.assertIsInstance(data, csr_matrix)
-
-
-if __name__ == '__main__':
-    unittest.main()
+        assert isinstance(data, csr_matrix)
diff --git a/test/test_pipeline/components/test_training.py b/test/test_pipeline/components/test_training.py
index ec745d613..4e8e9b0ca 100644
--- a/test/test_pipeline/components/test_training.py
+++ b/test/test_pipeline/components/test_training.py
@@ -139,7 +139,9 @@ def test_evaluate(self):
             device=self.device,
             metrics_during_training=True,
             scheduler=None,
-            task_type=self.task_type
+            task_type=self.task_type,
+            output_type=self.output_type,
+            labels=self.y
         )
 
         prev_loss, prev_metrics = trainer.evaluate(self.loader, epoch=1, writer=None)
@@ -173,7 +175,9 @@ def test_epoch_training(self):
             optimizer=self.optimizer,
             device=self.device,
             metrics_during_training=True,
-            task_type=self.task_type
+            task_type=self.task_type,
+            output_type=self.output_type,
+            labels=self.y
         )
 
         # Train the model
@@ -205,7 +209,9 @@ def test_epoch_training(self):
             optimizer=self.optimizer,
             device=self.device,
             metrics_during_training=True,
-            task_type=self.task_type
+            task_type=self.task_type,
+            output_type=self.output_type,
+            labels=self.y
         )
 
         # Train the model
@@ -264,7 +270,7 @@ def test_every_trainer_is_valid(self):
     def test_get_set_config_space(self):
         """Make sure that we can setup a valid choice in the trainer
         choice"""
-        trainer_choice = TrainerChoice(dataset_properties={})
+        trainer_choice = TrainerChoice(dataset_properties={'task_type': 'tabular_classification'})
         cs = trainer_choice.get_hyperparameter_search_space()
 
         # Make sure that all hyperparameters are part of the serach space
diff --git a/test/test_pipeline/test_losses.py b/test/test_pipeline/test_losses.py
index 7cb744a29..ca3438d58 100644
--- a/test/test_pipeline/test_losses.py
+++ b/test/test_pipeline/test_losses.py
@@ -3,7 +3,9 @@
 import torch
 from torch import nn
 
+from autoPyTorch.constants import STRING_TO_OUTPUT_TYPES
 from autoPyTorch.pipeline.components.training.losses import get_loss_instance
+from autoPyTorch.utils.implementations import get_loss_weight_strategy
 
 
 @pytest.mark.parametrize('output_type', ['multiclass',
@@ -13,7 +15,7 @@
 def test_get_no_name(output_type):
     dataset_properties = {'task_type': 'tabular_classification', 'output_type': output_type}
     loss = get_loss_instance(dataset_properties)
-    assert isinstance(loss, nn.Module)
+    assert isinstance(loss(), nn.Module)
 
 
 @pytest.mark.parametrize('output_type_name', [('multiclass', 'CrossEntropyLoss'),
@@ -21,7 +23,7 @@ def test_get_no_name(output_type):
 def test_get_name(output_type_name):
     output_type, name = output_type_name
     dataset_properties = {'task_type': 'tabular_classification', 'output_type': output_type}
-    loss = get_loss_instance(dataset_properties, name)
+    loss = get_loss_instance(dataset_properties, name)()
     assert isinstance(loss, nn.Module)
     assert str(loss) == f"{name}()"
 
@@ -33,7 +35,8 @@ def test_get_name_error():
         get_loss_instance(dataset_properties, name)
 
 
-def test_losses():
+@pytest.mark.parametrize('weighted', [True, False])
+def test_losses(weighted):
     list_properties = [{'task_type': 'tabular_classification', 'output_type': 'multiclass'},
                        {'task_type': 'tabular_classification', 'output_type': 'binary'},
                        {'task_type': 'tabular_regression', 'output_type': 'continuous'}]
@@ -41,7 +44,17 @@ def test_losses():
     list_predictions = [pred_cross_entropy, torch.empty(4).random_(2), torch.randn(4)]
     list_names = [None, 'BCEWithLogitsLoss', None]
     list_targets = [torch.empty(4, dtype=torch.long).random_(4), torch.empty(4).random_(2), torch.randn(4)]
-    for dataset_properties, pred, target, name in zip(list_properties, list_predictions, list_targets, list_names):
+    labels = [torch.empty(20, dtype=torch.long).random_(4), torch.empty(12, dtype=torch.long).random_(2), None]
+    for dataset_properties, pred, target, name, label in zip(list_properties, list_predictions,
+                                                             list_targets, list_names, labels):
         loss = get_loss_instance(dataset_properties=dataset_properties, name=name)
+        weights = None
+        if bool(weighted) and 'classification' in dataset_properties['task_type']:
+            strategy = get_loss_weight_strategy(output_type=STRING_TO_OUTPUT_TYPES[dataset_properties['output_type']])
+            weights = strategy(y=label)
+            weights = torch.from_numpy(weights)
+            weights = weights.type(torch.FloatTensor)
+            kwargs = {'pos_weight': weights} if 'binary' in dataset_properties['output_type'] else {'weight': weights}
+        loss = loss() if weights is None else loss(**kwargs)
         score = loss(pred, target)
         assert isinstance(score, torch.Tensor)
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
index d5cc9acee..e7ae68012 100644
--- a/test/test_pipeline/test_tabular_classification.py
+++ b/test/test_pipeline/test_tabular_classification.py
@@ -146,11 +146,9 @@ def test_default_configuration(self, fit_dictionary, is_small_preprocess):
         """Makes sure that when no config is set, we can trust the
         default configuration from the space"""
 
-        fit_dictionary['is_small_preprocess'] = is_small_preprocess
-
+        fit_dictionary['dataset_properties']['is_small_preprocess'] = is_small_preprocess
         pipeline = TabularClassificationPipeline(
             dataset_properties=fit_dictionary['dataset_properties'])
-
         pipeline.fit(fit_dictionary)
 
     def test_remove_key_check_requirements(self, fit_dictionary):