diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py
index 11c6cf577..4f64b429a 100644
--- a/autoPyTorch/data/base_feature_validator.py
+++ b/autoPyTorch/data/base_feature_validator.py
@@ -1,20 +1,14 @@
 import logging
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 import numpy as np
 
-import pandas as pd
-
-from scipy.sparse import spmatrix
-
 from sklearn.base import BaseEstimator
 
+from autoPyTorch.data.utils import SupportedFeatTypes, list_to_pandas
 from autoPyTorch.utils.logging_ import PicklableClientLogger
 
 
-SupportedFeatTypes = Union[List, pd.DataFrame, np.ndarray, spmatrix]
-
-
 class BaseFeatureValidator(BaseEstimator):
     """
     A class to pre-process features. In this regards, the format of the data is checked,
@@ -27,8 +21,8 @@ class BaseFeatureValidator(BaseEstimator):
         column_transformer (Optional[BaseEstimator])
             Host a encoder object if the data requires transformation (for example,
             if provided a categorical column in a pandas DataFrame)
-        transformed_columns (List[str])
-            List of columns that were encoded.
+        enc_columns (Optional[List[str]]):
+            The list of column names that should be encoded.
     """
     def __init__(
         self,
@@ -37,11 +31,11 @@ def __init__(
         # Register types to detect unsupported data format changes
         self.feat_type: Optional[List[str]] = None
         self.data_type: Optional[type] = None
-        self.dtypes: List[str] = []
+        self.dtypes: Dict[str, str] = {}
         self.column_order: List[str] = []
 
         self.column_transformer: Optional[BaseEstimator] = None
-        self.transformed_columns: List[str] = []
+        self.enc_columns: List[str] = []
 
         self.logger: Union[
             PicklableClientLogger, logging.Logger
@@ -75,7 +69,8 @@ def fit(
 
         # If a list was provided, it will be converted to pandas
         if isinstance(X_train, list):
-            X_train, X_test = self.list_to_dataframe(X_train, X_test)
+            X_train = list_to_pandas(X_train, self.logger)
+            X_test = list_to_pandas(X_test, self.logger) if X_test is not None else None
 
         self._check_data(X_train)
 
diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py
index 530675fbd..f5099ff62 100644
--- a/autoPyTorch/data/base_target_validator.py
+++ b/autoPyTorch/data/base_target_validator.py
@@ -1,18 +1,14 @@
 import logging
-from typing import List, Optional, Union, cast
+from typing import Optional, Union, cast
 
 import numpy as np
 
 import pandas as pd
 
-from scipy.sparse import spmatrix
-
 from sklearn.base import BaseEstimator
 
 from autoPyTorch.utils.logging_ import PicklableClientLogger
-
-
-SupportedTargetTypes = Union[List, pd.Series, pd.DataFrame, np.ndarray, spmatrix]
+from autoPyTorch.data.utils import SupportedTargetTypes
 
 
 class BaseTargetValidator(BaseEstimator):
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 3e8c316b0..cecd90257 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -1,6 +1,12 @@
+"""
+TODO:
+    1. Add dtypes argument to TabularFeatureValidator
+    2. Modify dtypes from List[str] to Dict[str, str]
+    3. Add the feature to enforce the dtype to the provided dtypes
+"""
 import functools
 from logging import Logger
-from typing import Any, Dict, List, Mapping, Optional, Tuple, Union, cast
+from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Union, cast
 
 import numpy as np
 
@@ -9,7 +15,6 @@
 
 from scipy.sparse import issparse, spmatrix
 
-import sklearn.utils
 from sklearn import preprocessing
 from sklearn.base import BaseEstimator
 from sklearn.compose import ColumnTransformer
@@ -19,9 +24,15 @@
 
 from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SupportedFeatTypes
 from autoPyTorch.data.utils import (
+    ColumnDTypes,
     DatasetCompressionInputType,
     DatasetDTypeContainerType,
-    reduce_dataset_size_if_too_large
+    _categorical_left_mover,
+    _check_and_to_array,
+    _get_columns_to_encode,
+    has_object_columns,
+    reduce_dataset_size_if_too_large,
+    to_pandas,
 )
 from autoPyTorch.utils.common import ispandas
 from autoPyTorch.utils.logging_ import PicklableClientLogger
@@ -86,7 +97,7 @@ class TabularFeatureValidator(BaseFeatureValidator):
             List for which an element at each index is a
             list containing the categories for the respective
             categorical column.
-        transformed_columns (List[str])
+        enc_columns (List[str])
             List of columns that were transformed.
         column_transformer (Optional[BaseEstimator])
             Hosts an imputer and an encoder object if the data
@@ -104,38 +115,79 @@ def __init__(
         self,
         logger: Optional[Union[PicklableClientLogger, Logger]] = None,
         dataset_compression: Optional[Mapping[str, Any]] = None,
+        dtypes: Optional[Dict[str, str]] = None,
     ) -> None:
+        super().__init__(logger)
         self._dataset_compression = dataset_compression
         self._reduced_dtype: Optional[DatasetDTypeContainerType] = None
-        super().__init__(logger)
+        self.all_nan_columns: Optional[Set[str]] = None
+        self.dtypes = dtypes if dtypes is not None else {}
+        self._called_infer_object = False
+
+    def _convert_all_nan_columns_to_numeric(self, X: pd.DataFrame, fit: bool = False) -> pd.DataFrame:
+        """
+        Convert columns whose values were all nan in the training dataset to numeric.
+
+        Args:
+            X (pd.DataFrame):
+                The data to transform.
+            fit (bool):
+                Whether this call is the fit to X or the transform using pre-fitted transformer.
+        """
+        if not fit and not issparse(X) and self.all_nan_columns is None:
+            raise ValueError('_fit must be called before calling transform')
+
+        if fit:
+            all_nan_columns = X.columns[X.isna().all()]
+        else:
+            assert self.all_nan_columns is not None
+            all_nan_columns = list(self.all_nan_columns)
+
+        for col in all_nan_columns:
+            X[col] = np.nan
+            X[col] = pd.to_numeric(X[col])
+            if len(self.dtypes):
+                self.dtypes[col] = X[col].dtype.name
+
+        if has_object_columns(X.dtypes.values):
+            X = self.infer_objects(X)
+
+        if fit:
+            # TODO: Check how to integrate below
+            # self.dtypes = [dt.name for dt in X.dtypes]
+            self.all_nan_columns = set(all_nan_columns)
+
+        return X
 
     @staticmethod
     def _comparator(cmp1: str, cmp2: str) -> int:
-        """Order so that categorical columns come left and numerical columns come right
+        return _categorical_left_mover(cmp1, cmp2)
 
-        Args:
-            cmp1 (str): First variable to compare
-            cmp2 (str): Second variable to compare
+    def _encode_categories(self, X: pd.DataFrame) -> None:
+        preprocessors = get_tabular_preprocessors()
+        self.column_transformer = _create_column_transformer(
+            preprocessors=preprocessors,
+            categorical_columns=self.enc_columns,
+        )
 
-        Raises:
-            ValueError: if the values of the variables to compare
-            are not in 'categorical' or 'numerical'
+        assert self.column_transformer is not None  # Mypy redefinition
+        self.column_transformer.fit(X)
 
-        Returns:
-            int: either [0, -1, 1]
-        """
-        choices = ['categorical', 'numerical']
-        if cmp1 not in choices or cmp2 not in choices:
-            raise ValueError('The comparator for the column order only accepts {}, '
-                             'but got {} and {}'.format(choices, cmp1, cmp2))
+        # The column transformer moves categoricals to the left side
+        assert self.feat_type is not None
+        self.feat_type = sorted(self.feat_type, key=functools.cmp_to_key(self._comparator))
 
-        idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
-        return idx1 - idx2
+        encoded_categories = self.column_transformer.\
+            named_transformers_['categorical_pipeline'].\
+            named_steps['ordinalencoder'].categories_
 
-    def _fit(
-        self,
-        X: SupportedFeatTypes,
-    ) -> BaseEstimator:
+        # An ordinal encoder for each categorical columns
+        self.categories = [
+            list(range(len(cat)))
+            for cat in encoded_categories
+        ]
+
+    def _fit(self, X: SupportedFeatTypes) -> BaseEstimator:
         """
         In case input data is a pandas DataFrame, this utility encodes the user provided
         features (from categorical for example) to a numerical value that further stages
@@ -151,78 +203,27 @@ def _fit(
                 The fitted base estimator
         """
 
-        # The final output of a validator is a numpy array. But pandas
-        # gives us information about the column dtype
-        if isinstance(X, np.ndarray):
-            X = self.numpy_array_to_pandas(X)
+        X = to_pandas(X)  # there is the column dtype info, so convert it to pandas
 
         if ispandas(X) and not issparse(X):
             X = cast(pd.DataFrame, X)
-            # Treat a column with all instances a NaN as numerical
-            # This will prevent doing encoding to a categorical column made completely
-            # out of nan values -- which will trigger a fail, as encoding is not supported
-            # with nan values.
-            # Columns that are completely made of NaN values are provided to the pipeline
-            # so that later stages decide how to handle them
-            if np.any(pd.isnull(X)):
-                for column in X.columns:
-                    if X[column].isna().all():
-                        X[column] = pd.to_numeric(X[column])
-                        # Also note this change in self.dtypes
-                        if len(self.dtypes) != 0:
-                            self.dtypes[list(X.columns).index(column)] = X[column].dtype
-
-            if not X.select_dtypes(include='object').empty:
-                X = self.infer_objects(X)
-
-            self.transformed_columns, self.feat_type = self._get_columns_to_encode(X)
+            X = self._convert_all_nan_columns_to_numeric(X, fit=True)
+            self.enc_columns, self.feat_type = self._get_columns_to_encode(X)
 
             assert self.feat_type is not None
+            if len(self.enc_columns) > 0:
+                self._encode_categories(X)
 
-            if len(self.transformed_columns) > 0:
-
-                preprocessors = get_tabular_preprocessors()
-                self.column_transformer = _create_column_transformer(
-                    preprocessors=preprocessors,
-                    categorical_columns=self.transformed_columns,
-                )
-
-                # Mypy redefinition
-                assert self.column_transformer is not None
-                self.column_transformer.fit(X)
-
-                # The column transformer reorders the feature types
-                # therefore, we need to change the order of columns as well
-                # This means categorical columns are shifted to the left
-                self.feat_type = sorted(
-                    self.feat_type,
-                    key=functools.cmp_to_key(self._comparator)
-                )
-
-                encoded_categories = self.column_transformer.\
-                    named_transformers_['categorical_pipeline'].\
-                    named_steps['ordinalencoder'].categories_
-                self.categories = [
-                    # We fit an ordinal encoder, where all categorical
-                    # columns are shifted to the left
-                    list(range(len(cat)))
-                    for cat in encoded_categories
-                ]
-
-            for i, type_ in enumerate(self.feat_type):
-                if 'numerical' in type_:
+            for i, type_name in enumerate(self.feat_type):
+                if ColumnDTypes.numerical in type_name:
                     self.numerical_columns.append(i)
                 else:
                     self.categorical_columns.append(i)
 
-        # Lastly, store the number of features
         self.num_features = np.shape(X)[1]
         return self
 
-    def transform(
-        self,
-        X: SupportedFeatTypes,
-    ) -> Union[np.ndarray, spmatrix, pd.DataFrame]:
+    def transform(self, X: SupportedFeatTypes) -> Union[np.ndarray, spmatrix, pd.DataFrame]:
         """
         Validates and fit a categorical encoder (if needed) to the features.
         The supported data types are List, numpy arrays and pandas DataFrames.
@@ -235,40 +236,54 @@ def transform(
         Return:
             np.ndarray:
                 The transformed array
+
+        Note:
+            The default transform performs the folloing:
+                * simple imputation for both
+                * scaling for numerical
+                * one-hot encoding for categorical
+            For example, here is a simple case
+            of which all the columns are categorical.
+                data = [
+                    {'A': 1, 'B': np.nan, 'C': np.nan},
+                    {'A': np.nan, 'B': 3, 'C': np.nan},
+                    {'A': 2, 'B': np.nan, 'C': np.nan}
+                ]
+            and suppose all the columns are categorical,
+            then
+                * `A` in {np.nan, 1, 2}
+                * `B` in {np.nan, 3}
+                * `C` in {np.nan} <=== it will be dropped.
+
+            So in the column A,
+                * np.nan ==> [1, 0, 0] (always the index 0)
+                * 1      ==> [0, 1, 0]
+                * 2      ==> [0, 0, 1]
+            in the column B,
+                * np.nan ==> [1, 0]
+                * 3      ==> [0, 1]
+            Therefore, by concatenating,
+                * {'A': 1, 'B': np.nan, 'C': np.nan} ==> [0, 1, 0, 1, 0]
+                * {'A': np.nan, 'B': 3, 'C': np.nan} ==> [1, 0, 0, 0, 1]
+                * {'A': 2, 'B': np.nan, 'C': np.nan} ==> [0, 0, 1, 1, 0]
+                ==> [
+                    [0, 1, 0, 1, 0],
+                    [1, 0, 0, 0, 1],
+                    [0, 0, 1, 1, 0]
+                ]
         """
         if not self._is_fitted:
             raise NotFittedError("Cannot call transform on a validator that is not fitted")
 
-        # If a list was provided, it will be converted to pandas
-        if isinstance(X, list):
-            X, _ = self.list_to_dataframe(X)
-
-        if isinstance(X, np.ndarray):
-            X = self.numpy_array_to_pandas(X)
-
+        X = to_pandas(X)
         if ispandas(X) and not issparse(X):
-            if np.any(pd.isnull(X)):
-                for column in X.columns:
-                    if X[column].isna().all():
-                        X[column] = pd.to_numeric(X[column])
-
-            # Also remove the object dtype for new data
-            if not X.select_dtypes(include='object').empty:
-                X = self.infer_objects(X)
+            X = self._convert_all_nan_columns_to_numeric(X)
 
         # Check the data here so we catch problems on new test data
         self._check_data(X)
 
         # Pandas related transformations
         if ispandas(X) and self.column_transformer is not None:
-            if np.any(pd.isnull(X)):
-                # After above check it means that if there is a NaN
-                # the whole column must be NaN
-                # Make sure it is numerical and let the pipeline handle it
-                for column in X.columns:
-                    if X[column].isna().all():
-                        X[column] = pd.to_numeric(X[column])
-
             X = self.column_transformer.transform(X)
 
         # Sparse related transformations
@@ -276,20 +291,7 @@ def transform(
         if issparse(X) and hasattr(X, 'sort_indices'):
             X.sort_indices()
 
-        try:
-            X = sklearn.utils.check_array(
-                X,
-                force_all_finite=False,
-                accept_sparse='csr'
-            )
-        except Exception as e:
-            self.logger.exception(f"Conversion failed for input {X.dtypes} {X}"
-                                  "This means AutoPyTorch was not able to properly "
-                                  "Extract the dtypes of the provided input features. "
-                                  "Please try to manually cast it to a supported "
-                                  "numerical or categorical values.")
-            raise e
-
+        X = _check_and_to_array(X, logger=self.logger)
         X = self._compress_dataset(X)
 
         return X
@@ -301,7 +303,6 @@ def _compress_dataset(self, X: DatasetCompressionInputType) -> DatasetCompressio
         the testing data is converted to the same dtype as
         the training data.
 
-
         Args:
             X (DatasetCompressionInputType):
                 Dataset
@@ -322,10 +323,26 @@ def _compress_dataset(self, X: DatasetCompressionInputType) -> DatasetCompressio
             self._reduced_dtype = dict(X.dtypes) if is_dataframe else X.dtype
             return X
 
-    def _check_data(
-        self,
-        X: SupportedFeatTypes,
-    ) -> None:
+    def _check_dataframe(self, X: pd.DataFrame) -> None:
+        err_msg = " of the features must be identical before/after fit(), "
+        err_msg += "but different between training and test datasets:\n"
+
+        # Define the column to be encoded as the feature validator is fitted once per estimator
+        self.enc_columns, self.feat_type = self._get_columns_to_encode(X)
+
+        column_order = [column for column in X.columns]
+        if len(self.column_order) == 0:
+            self.column_order = column_order
+        elif self.column_order != column_order:
+            raise ValueError(f"The column order{err_msg}train: {self.column_order}\ntest: {column_order}")
+
+        dtypes = {col: dtype.name for col, dtype in zip(X.columns, X.dtypes)}
+        if len(self.dtypes) == 0:
+            self.dtypes = dtypes
+        elif self.dtypes != dtypes:
+            raise ValueError(f"The dtypes{err_msg}train: {self.dtypes}\ntest: {dtypes}")
+
+    def _check_data(self, X: SupportedFeatTypes) -> None:
         """
         Feature dimensionality and data type checks
 
@@ -336,73 +353,31 @@ def _check_data(
         """
 
         if not isinstance(X, (np.ndarray, pd.DataFrame)) and not issparse(X):
-            raise ValueError("AutoPyTorch only supports Numpy arrays, Pandas DataFrames,"
-                             " scipy sparse and Python Lists, yet, the provided input is"
-                             " of type {}".format(type(X))
-                             )
+            raise TypeError(
+                "AutoPyTorch only supports numpy.ndarray, pandas.DataFrame,"
+                f" scipy.sparse and List, but got {type(X)}"
+            )
 
         if self.data_type is None:
             self.data_type = type(X)
         if self.data_type != type(X):
-            self.logger.warning("AutoPyTorch previously received features of type %s "
-                                "yet the current features have type %s. Changing the dtype "
-                                "of inputs to an estimator might cause problems" % (
-                                    str(self.data_type),
-                                    str(type(X)),
-                                ),
-                                )
-
-        # Do not support category/string numpy data. Only numbers
-        if hasattr(X, "dtype"):
-            if not np.issubdtype(X.dtype.type, np.number):  # type: ignore[union-attr]
-                raise ValueError(
-                    "When providing a numpy array to AutoPyTorch, the only valid "
-                    "dtypes are numerical ones. The provided data type {} is not supported."
-                    "".format(
-                        X.dtype.type,  # type: ignore[union-attr]
-                    )
-                )
+            self.logger.warning(
+                f"AutoPyTorch previously received features of type {str(self.data_type)}, "
+                f"but got type {str(type(X))} in the current features. This change might cause problems"
+            )
 
-        # Then for Pandas, we do not support Nan in categorical columns
-        if ispandas(X):
-            # If entered here, we have a pandas dataframe
+        if ispandas(X):  # For pandas, no support of nan in categorical cols
             X = cast(pd.DataFrame, X)
+            self._check_dataframe(X)
 
-            # Handle objects if possible
-            if not X.select_dtypes(include='object').empty:
-                X = self.infer_objects(X)
-
-            # Define the column to be encoded here as the feature validator is fitted once
-            # per estimator
-            self.transformed_columns, self.feat_type = self._get_columns_to_encode(X)
-
-            column_order = [column for column in X.columns]
-            if len(self.column_order) > 0:
-                if self.column_order != column_order:
-                    raise ValueError("Changing the column order of the features after fit() is "
-                                     "not supported. Fit() method was called with "
-                                     "{} whereas the new features have {} as type".format(self.column_order,
-                                                                                          column_order,)
-                                     )
-            else:
-                self.column_order = column_order
-
-            dtypes = [dtype.name for dtype in X.dtypes]
-            if len(self.dtypes) > 0:
-                if self.dtypes != dtypes:
-                    raise ValueError("Changing the dtype of the features after fit() is "
-                                     "not supported. Fit() method was called with "
-                                     "{} whereas the new features have {} as type".format(self.dtypes,
-                                                                                          dtypes,
-                                                                                          )
-                                     )
-            else:
-                self.dtypes = dtypes
-
-    def _get_columns_to_encode(
-        self,
-        X: pd.DataFrame,
-    ) -> Tuple[List[str], List[str]]:
+        # For ndarray, no support of category/string
+        if isinstance(X, np.ndarray) and not np.issubdtype(X.dtype.type, np.number):
+            dt = X.dtype.type
+            raise TypeError(
+                f"AutoPyTorch does not support numpy.ndarray with non-numerical dtype, but got {dt}"
+            )
+
+    def _get_columns_to_encode(self, X: pd.DataFrame) -> Tuple[List[str], Dict[str, str]]:
         """
         Return the columns to be encoded from a pandas dataframe
 
@@ -412,121 +387,16 @@ def _get_columns_to_encode(
                 checks) and an encoder fitted in the case the data needs encoding
 
         Returns:
-            transformed_columns (List[str]):
-                Columns to encode, if any
-            feat_type:
-                Type of each column numerical/categorical
-        """
-
-        if len(self.transformed_columns) > 0 and self.feat_type is not None:
-            return self.transformed_columns, self.feat_type
-
-        # Register if a column needs encoding
-        transformed_columns = []
-
-        # Also, register the feature types for the estimator
-        feat_type = []
-
-        # Make sure each column is a valid type
-        for i, column in enumerate(X.columns):
-            if X[column].dtype.name in ['category', 'bool']:
-
-                transformed_columns.append(column)
-                feat_type.append('categorical')
-            # Move away from np.issubdtype as it causes
-            # TypeError: data type not understood in certain pandas types
-            elif not is_numeric_dtype(X[column]):
-                if X[column].dtype.name == 'object':
-                    raise ValueError(
-                        "Input Column {} has invalid type object. "
-                        "Cast it to a valid dtype before using it in AutoPyTorch. "
-                        "Valid types are numerical, categorical or boolean. "
-                        "You can cast it to a valid dtype using "
-                        "pandas.Series.astype ."
-                        "If working with string objects, the following "
-                        "tutorial illustrates how to work with text data: "
-                        "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html".format(
-                            # noqa: E501
-                            column,
-                        )
-                    )
-                elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(
-                    X[column].dtype
-                ):
-                    raise ValueError(
-                        "AutoPyTorch does not support time and/or date datatype as given "
-                        "in column {}. Please convert the time information to a numerical value "
-                        "first. One example on how to do this can be found on "
-                        "https://stats.stackexchange.com/questions/311494/".format(
-                            column,
-                        )
-                    )
-                else:
-                    raise ValueError(
-                        "Input Column {} has unsupported dtype {}. "
-                        "Supported column types are categorical/bool/numerical dtypes. "
-                        "Make sure your data is formatted in a correct way, "
-                        "before feeding it to AutoPyTorch.".format(
-                            column,
-                            X[column].dtype.name,
-                        )
-                    )
-            else:
-                feat_type.append('numerical')
-        return transformed_columns, feat_type
-
-    def list_to_dataframe(
-        self,
-        X_train: SupportedFeatTypes,
-        X_test: Optional[SupportedFeatTypes] = None,
-    ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
+            enc_columns (List[str]):
+                Columns to encode
+            feat_type (Dict[str, str]):
+                Whether each column is numerical or categorical
         """
-        Converts a list to a pandas DataFrame. In this process, column types are inferred.
-
-        If test data is provided, we proactively match it to train data
 
-        Args:
-            X_train (SupportedFeatTypes):
-                A set of features that are going to be validated (type and dimensionality
-                checks) and a encoder fitted in the case the data needs encoding
-            X_test (Optional[SupportedFeatTypes]):
-                A hold out set of data used for checking
-
-        Returns:
-            pd.DataFrame:
-                transformed train data from list to pandas DataFrame
-            pd.DataFrame:
-                transformed test data from list to pandas DataFrame
-        """
-
-        # If a list was provided, it will be converted to pandas
-        X_train = pd.DataFrame(data=X_train).infer_objects()
-        self.logger.warning("The provided feature types to AutoPyTorch are of type list."
-                            "Features have been interpreted as: {}".format([(col, t) for col, t in
-                                                                            zip(X_train.columns, X_train.dtypes)]))
-        if X_test is not None:
-            if not isinstance(X_test, list):
-                self.logger.warning("Train features are a list while the provided test data"
-                                    "is {}. X_test will be casted as DataFrame.".format(type(X_test))
-                                    )
-            X_test = pd.DataFrame(data=X_test).infer_objects()
-        return X_train, X_test
-
-    def numpy_array_to_pandas(
-        self,
-        X: np.ndarray,
-    ) -> pd.DataFrame:
-        """
-        Converts a numpy array to pandas for type inference
-
-        Args:
-            X (np.ndarray):
-                data to be interpreted.
-
-        Returns:
-            pd.DataFrame
-        """
-        return pd.DataFrame(X).infer_objects().convert_dtypes()
+        if len(self.enc_columns) > 0 and self.feat_type is not None:
+            return self.enc_columns, self.feat_type
+        else:
+            return _get_columns_to_encode(X)
 
     def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
         """
@@ -541,26 +411,25 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
         Returns:
             pd.DataFrame
         """
-        if hasattr(self, 'object_dtype_mapping'):
-            # Mypy does not process the has attr. This dict is defined below
-            for key, dtype in self.object_dtype_mapping.items():  # type: ignore[has-type]
-                if 'int' in dtype.name:
-                    # In the case train data was interpreted as int
-                    # and test data was interpreted as float, because of 0.0
-                    # for example, honor training data
-                    X[key] = X[key].applymap(np.int64)
-                else:
-                    try:
-                        X[key] = X[key].astype(dtype.name)
-                    except Exception as e:
-                        # Try inference if possible
-                        self.logger.warning(f"Tried to cast column {key} to {dtype} caused {e}")
-                        pass
-        else:
+        if self._called_infer_object:
+            # honor the training data types
+            try:
+                # Mypy does not process the has attr.
+                X = X.astype(self.dtypes)  # type: ignore[has-type]
+            except Exception as e:
+                self.logger.warning(
+                    'Casting the columns to training dtypes '
+                    f'{self.dtypes} caused the exception {e}'  # type: ignore[has-type]
+                )
+        elif len(self.dtypes):  # Overwrite the dtypes in test data by those in the training data
+            X = X.astype(self.dtypes)
+        else:  # Calling for the first time to infer the categories
             X = X.infer_objects()
-            for column in X.columns:
-                if not is_numeric_dtype(X[column]):
-                    X[column] = X[column].astype('category')
-            self.object_dtype_mapping = {column: X[column].dtype for column in X.columns}
-        self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}")
+            cat_dtypes = {col: 'category' for col, dtype in zip(X.columns, X.dtypes) if not is_numeric_dtype(dtype)}
+            X = X.astype(cat_dtypes)
+
+        self.dtypes.update({col: dtype.name for col, dtype in zip(X.columns, X.dtypes)})
+        self.logger.debug(f"New dtypes of data: {self.dtypes}")
+        self._called_infer_object = True
+
         return X
diff --git a/autoPyTorch/data/tabular_target_validator.py b/autoPyTorch/data/tabular_target_validator.py
index 22cabb999..693a24cae 100644
--- a/autoPyTorch/data/tabular_target_validator.py
+++ b/autoPyTorch/data/tabular_target_validator.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Union, cast
+from typing import List, Optional, cast
 
 import numpy as np
 
@@ -7,24 +7,16 @@
 
 from scipy.sparse import issparse, spmatrix
 
-import sklearn.utils
 from sklearn import preprocessing
 from sklearn.base import BaseEstimator
 from sklearn.exceptions import NotFittedError
 from sklearn.utils.multiclass import type_of_target
 
 from autoPyTorch.data.base_target_validator import BaseTargetValidator, SupportedTargetTypes
+from autoPyTorch.data.utils import ArrayType, _check_and_to_array
 from autoPyTorch.utils.common import ispandas
 
 
-ArrayType = Union[np.ndarray, spmatrix]
-
-
-def _check_and_to_array(y: SupportedTargetTypes) -> ArrayType:
-    """ sklearn check array will make sure we have the correct numerical features for the array """
-    return sklearn.utils.check_array(y, force_all_finite=True, accept_sparse='csr', ensure_2d=False)
-
-
 def _modify_regression_target(y: ArrayType) -> ArrayType:
     # Regression targets must have numbers after a decimal point.
     # Ref: https://github.com/scikit-learn/scikit-learn/issues/8952
@@ -124,8 +116,9 @@ def _fit(
         return self
 
     def _transform_by_encoder(self, y: SupportedTargetTypes) -> np.ndarray:
+        kwargs = dict(force_all_finite=True, ensure_2d=False)
         if self.encoder is None:
-            return _check_and_to_array(y)
+            return _check_and_to_array(y, **kwargs)
 
         # remove ravel warning from pandas Series
         shape = np.shape(y)
@@ -139,7 +132,7 @@ def _transform_by_encoder(self, y: SupportedTargetTypes) -> np.ndarray:
         else:
             y = self.encoder.transform(np.array(y).reshape(-1, 1)).reshape(-1)
 
-        return _check_and_to_array(y)
+        return _check_and_to_array(y, **kwargs)
 
     def transform(self, y: SupportedTargetTypes) -> np.ndarray:
         """
diff --git a/autoPyTorch/data/utils.py b/autoPyTorch/data/utils.py
index 03375ce27..40dc7aa11 100644
--- a/autoPyTorch/data/utils.py
+++ b/autoPyTorch/data/utils.py
@@ -1,5 +1,6 @@
 # Implementation used from https://github.com/automl/auto-sklearn/blob/development/autosklearn/util/data.py
 import warnings
+from logging import Logger
 from math import floor
 from typing import (
     Any,
@@ -18,11 +19,18 @@
 import numpy as np
 
 import pandas as pd
+from pandas.api.types import is_numeric_dtype
 
 from scipy.sparse import issparse, spmatrix
 
-from autoPyTorch.utils.common import ispandas
+from sklearn.utils import check_array
 
+from autoPyTorch.utils.common import autoPyTorchEnum, ispandas
+
+
+ArrayType = Union[np.ndarray, spmatrix]
+SupportedFeatTypes = Union[List, pd.DataFrame, np.ndarray, spmatrix]
+SupportedTargetTypes = Union[List, pd.Series, pd.DataFrame, np.ndarray, spmatrix]
 
 # TODO: TypedDict with python 3.8
 #
@@ -39,6 +47,188 @@
 }
 
 
+class ColumnDTypes(autoPyTorchEnum):
+    numerical = "numerical"
+    categorical = "categorical"
+
+
+def convert_dtype_enum_dict_to_str_dict(dtype_dict: Dict[str, ColumnDTypes]) -> Dict[str, str]:
+    enum2str = {type_choice: str(type_choice) for type_choice in ColumnDTypes}
+    return {col_name: enum2str[dtype_choice] for col_name, dtype_choice in dtype_dict.items()}
+
+
+def list_to_pandas(data: List, logger: Optional[Logger] = None) -> pd.DataFrame:
+    """
+    Convert a list to a pandas DataFrame. In this process, column types are inferred.
+
+    Args:
+        data (List):
+            A list of features.
+
+    Returns:
+        pd.DataFrame:
+            transformed data from list to pandas DataFrame
+    """
+    if not isinstance(data, list):
+        raise TypeError(f"data must be list, but got {type(data)}")
+
+    # If a list was provided, it will be converted to pandas
+    data = pd.DataFrame(data=data).infer_objects()
+    data_info = [(col, t) for col, t in zip(data.columns, data.dtypes)]
+
+    if logger is not None:
+        logger.warning(
+            "The provided feature types to AutoPyTorch are list."
+            f"Features have been interpreted as: {data_info}"
+        )
+
+    return data
+
+
+def numpy_to_pandas(data: np.ndarray) -> pd.DataFrame:
+    """
+    Converts a numpy array to pandas for type inference
+
+    Args:
+        X (np.ndarray):
+            data to be interpreted.
+
+    Returns:
+        pd.DataFrame
+    """
+    if not isinstance(data, np.ndarray):
+        raise TypeError(f"data must be np.ndarray, but got {type(data)}")
+
+    return pd.DataFrame(data).infer_objects().convert_dtypes()
+
+
+def to_pandas(data: SupportedFeatTypes, logger: Optional[Logger] = None) -> SupportedFeatTypes:
+    if isinstance(data, list):
+        data = list_to_pandas(data, logger)
+    elif isinstance(data, np.ndarray):
+        data = numpy_to_pandas(data)
+
+    return data
+
+
+def has_object_columns(feature_types: pd.Series) -> bool:
+    """
+    Indicate whether on a Series of dtypes for a Pandas DataFrame
+    there exists one or more object columns.
+    Args:
+        feature_types (pd.Series): The feature types for a DataFrame.
+    Returns:
+        bool:
+            True if the DataFrame dtypes contain an object column, False
+            otherwise.
+    """
+    return np.dtype('O') in feature_types
+
+
+def _check_and_to_array(
+    data: Union[SupportedFeatTypes, SupportedTargetTypes],
+    logger: Optional[Logger] = None,
+    **kwargs: Dict[str, Any]
+) -> ArrayType:
+    """ sklearn check array will make sure we have the correct numerical features for the array """
+    _kwargs = dict(accept_sparse='csr', force_all_finite=False)
+    _kwargs.update(kwargs)
+    try:
+        return check_array(data, **_kwargs)
+    except Exception as e:
+        if logger is not None:
+            logger.exception(
+                f"Conversion failed for input {data}"
+                "This means AutoPyTorch was not able to properly "
+                "Extract the dtypes of the provided input features. "
+                "Please try to manually cast it to a supported "
+                "numerical or categorical values."
+            )
+        raise e
+
+
+def _error_due_to_unsupported_column(X: pd.DataFrame, column: str) -> None:
+    # Move away from np.issubdtype as it causes
+    # TypeError: data type not understood in certain pandas types
+    def _generate_error_message_prefix(type_name: str, proc_type: Optional[str] = None) -> str:
+        msg1 = f"column `{column}` has an invalid type `{type_name}`. "
+        msg2 = "Cast it to a numerical type, category type or bool type by astype method. "
+        msg3 = f"The following link might help you to know {proc_type} processing: "
+        return msg1 + msg2 + ("" if proc_type is None else msg3)
+
+    dtype = X[column].dtype
+    if dtype.name == 'object':
+        err_msg = _generate_error_message_prefix(type_name="object", proc_type="string")
+        url = "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html"
+        raise TypeError(f"{err_msg}{url}")
+    elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(dtype):
+        err_msg = _generate_error_message_prefix(type_name="time and/or date datatype", proc_type="datetime")
+        raise TypeError(f"{err_msg}https://stats.stackexchange.com/questions/311494/")
+    else:
+        err_msg = _generate_error_message_prefix(type_name=dtype.name)
+        raise TypeError(err_msg)
+
+
+def _get_columns_to_encode(X: pd.DataFrame) -> Tuple[List[str], Dict[str, str]]:
+    """
+    In case input data is a pandas DataFrame, this utility encodes the user provided
+    features (from categorical for example) to a numerical value that further stages
+    will be able to use
+
+    Args:
+        X (pd.DataFrame):
+            A set of features that are going to be validated (type and dimensionality
+            checks) and an encoder fitted in the case the data needs encoding
+
+    Returns:
+        enc_columns (List[str]):
+            Columns to encode
+        feat_type (Dict[str, str]):
+            Whether each column is numerical or categorical
+    """
+    enc_columns: List[str] = []
+    # feat_type: Dict[str, str] = {}
+    feat_type: List[str] = []
+
+    for dtype, col in zip(X.dtypes, X.columns):
+        if dtype.name in ['category', 'bool']:
+            enc_columns.append(col)
+            # feat_type[col] = str(ColumnDTypes.categorical)
+            feat_type.append(str(ColumnDTypes.categorical))
+        elif is_numeric_dtype(dtype):
+            # feat_type[col] = str(ColumnDTypes.numerical)
+            feat_type.append(str(ColumnDTypes.numerical))
+        else:
+            _error_due_to_unsupported_column(X, col)
+
+    return enc_columns, feat_type
+
+
+def _categorical_left_mover(cmp1: str, cmp2: str) -> int:
+    """Order so that categorical columns come left and numerical columns come right
+
+    Args:
+        cmp1 (str): First variable to compare
+        cmp2 (str): Second variable to compare
+
+    Raises:
+        ValueError: if the values of the variables to compare
+        are not in 'categorical' or 'numerical'
+
+    Returns:
+        int: either [0, -1, 1]
+    """
+    choices = [str(ColumnDTypes.categorical), str(ColumnDTypes.numerical)]
+    if cmp1 not in choices or cmp2 not in choices:
+        raise ValueError(
+            f"The comparator for the column order only accepts {choices}, "
+            f"but got {cmp1} and {cmp2}"
+        )
+
+    idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
+    return idx1 - idx2
+
+
 def get_dataset_compression_mapping(
     memory_limit: int,
     dataset_compression: Union[bool, Mapping[str, Any]]
diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py
index 48302bdee..23d3908e7 100644
--- a/autoPyTorch/utils/common.py
+++ b/autoPyTorch/utils/common.py
@@ -101,6 +101,9 @@ def __eq__(self, other: Any) -> bool:
     def __hash__(self) -> int:
         return hash(self.value)
 
+    def __str__(self) -> str:
+        return str(self.value)
+
 
 def custom_collate_fn(batch: List) -> List[Optional[torch.Tensor]]:
     """
diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index 3d352d765..c0d497ad9 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -220,7 +220,7 @@ def test_featurevalidator_supported_types(input_data_featuretest):
 )
 def test_featurevalidator_unsupported_numpy(input_data_featuretest):
     validator = TabularFeatureValidator()
-    with pytest.raises(ValueError, match=r".*When providing a numpy array.*not supported."):
+    with pytest.raises(TypeError, match=r"AutoPyTorch does not support numpy.ndarray with non-numerical dtype"):
         validator.fit(input_data_featuretest)
 
 
@@ -328,13 +328,11 @@ def test_features_unsupported_calls_are_raised():
     expected
     """
     validator = TabularFeatureValidator()
-    with pytest.raises(ValueError, match=r"AutoPyTorch does not support time"):
-        validator.fit(
-            pd.DataFrame({'datetime': [pd.Timestamp('20180310')]})
-        )
-    with pytest.raises(ValueError, match=r"AutoPyTorch only supports.*yet, the provided input"):
+    with pytest.raises(TypeError, match=r"invalid type `time and/or date datatype`."):
+        validator.fit(pd.DataFrame({'datetime': [pd.Timestamp('20180310')]}))
+    with pytest.raises(TypeError, match=r"AutoPyTorch only supports numpy.ndarray, pandas.DataFrame"):
         validator.fit({'input1': 1, 'input2': 2})
-    with pytest.raises(ValueError, match=r"has unsupported dtype string"):
+    with pytest.raises(TypeError, match=r"invalid type `string`."):
         validator.fit(pd.DataFrame([{'A': 1, 'B': 2}], dtype='string'))
     with pytest.raises(ValueError, match=r"The feature dimensionality of the train and test"):
         validator.fit(X_train=np.array([[1, 2, 3], [4, 5, 6]]),
@@ -517,15 +515,16 @@ def test_featurevalidator_new_data_after_fit(openml_id,
 
     # And then check proper error messages
     if train_data_type == 'pandas':
+        pattern = r"of the features must be identical before/after fit()"
         old_dtypes = copy.deepcopy(validator.dtypes)
         validator.dtypes = ['dummy' for dtype in X_train.dtypes]
-        with pytest.raises(ValueError, match=r"Changing the dtype of the features after fit"):
+        with pytest.raises(ValueError, match=pattern):
             transformed_X = validator.transform(X_test)
         validator.dtypes = old_dtypes
         if test_data_type == 'pandas':
             columns = X_test.columns.tolist()
             X_test = X_test[reversed(columns)]
-            with pytest.raises(ValueError, match=r"Changing the column order of the features"):
+            with pytest.raises(ValueError, match=pattern):
                 transformed_X = validator.transform(X_test)
 
 
diff --git a/test/test_data/test_validation.py b/test/test_data/test_validation.py
index 482c99769..cc89f5276 100644
--- a/test/test_data/test_validation.py
+++ b/test/test_data/test_validation.py
@@ -103,7 +103,7 @@ def test_sparse_data_validation_for_regression():
 
     validator.fit(X_train=X_sp, y_train=y)
 
-    X_t, y_t = validator.transform(X, y)
+    X_t, y_t = validator.transform(X_sp, y)
     assert np.shape(X) == np.shape(X_t)
 
     # make sure everything was encoded to number