From eade3871d8bde91da79b64950cb8d3419a7f4e3c Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 3 Aug 2021 18:35:46 +0200
Subject: [PATCH 01/54] preprocess inside data validator

---
 autoPyTorch/data/tabular_feature_validator.py | 234 +++++++-----------
 .../TabularColumnTransformer.py               |  16 +-
 .../encoding/base_encoder.py                  |   2 +-
 .../imputation/base_imputer.py                |   2 +-
 .../scaling/base_scaler.py                    |   2 +-
 .../base_network_embedding.py                 |  33 +--
 6 files changed, 120 insertions(+), 169 deletions(-)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 4c8a8fbc2..69ff55fa5 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -9,14 +9,62 @@
 import scipy.sparse
 
 import sklearn.utils
-from sklearn import preprocessing
+
 from sklearn.base import BaseEstimator
 from sklearn.compose import ColumnTransformer
 from sklearn.exceptions import NotFittedError
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import StandardScaler
 
 from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES
 
 
+def _create_column_transformer(preprocessors: typing.Dict, numerical_columns, categorical_columns):
+    numerical_pipeline = 'drop'
+    categorical_pipeline = 'drop'
+    if len(numerical_columns) > 0:
+        numerical_pipeline = make_pipeline(*preprocessors['numerical'])
+    if len(categorical_columns) > 0:
+        categorical_pipeline = make_pipeline(*preprocessors['categorical'])
+
+    return ColumnTransformer([
+        ('categorical_pipeline', categorical_pipeline, categorical_columns),
+        ('numerical_pipeline', numerical_pipeline, numerical_columns)],
+        remainder='passthrough'
+    )
+
+
+def get_tabular_preprocessors():
+    preprocessors = dict()
+    preprocessors['numerical'] = list()
+    preprocessors['categorical'] = list()
+
+    preprocessors['categorical'].append(SimpleImputer(strategy='constant',
+                  # Train data is numpy
+                  # as of this point, where
+                  # Ordinal Encoding is using
+                  # for categorical. Only
+                  # Numbers are allowed
+                  # fill_value='!missing!',
+                  fill_value=-1,
+                  copy=False))
+
+    # preprocessors['categorical'].append(("ordinal-encoder", OrdinalEncoder(
+    #      handle_unknown='use_encoded_value',
+    #      unknown_value=-1)))
+    preprocessors['categorical'].append(OneHotEncoder(
+        categories='auto',
+        sparse=False,
+        handle_unknown='ignore'))
+    preprocessors['numerical'].append(SimpleImputer(strategy='median',
+                                                                  copy=False))
+    preprocessors['numerical'].append(StandardScaler(with_mean=True, with_std=True, copy=False))
+
+    return preprocessors
+
+
 class TabularFeatureValidator(BaseFeatureValidator):
     def _fit(
         self,
@@ -43,73 +91,50 @@ def _fit(
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
             X = typing.cast(pd.DataFrame, X)
-            # Treat a column with all instances a NaN as numerical
-            # This will prevent doing encoding to a categorical column made completely
-            # out of nan values -- which will trigger a fail, as encoding is not supported
-            # with nan values.
-            # Columns that are completely made of NaN values are provided to the pipeline
-            # so that later stages decide how to handle them
-
-            # Clear whatever null column markers we had previously
-            self.null_columns.clear()
-            if np.any(pd.isnull(X)):
-                for column in X.columns:
-                    if X[column].isna().all():
-                        self.null_columns.add(column)
-                        X[column] = pd.to_numeric(X[column])
-                        # Also note this change in self.dtypes
-                        if len(self.dtypes) != 0:
-                            self.dtypes[list(X.columns).index(column)] = X[column].dtype
 
             if not X.select_dtypes(include='object').empty:
                 X = self.infer_objects(X)
 
             self._check_data(X)
-            self.enc_columns, self.feat_type = self._get_columns_to_encode(X)
-
-            if len(self.enc_columns) > 0:
-                X = self.impute_nan_in_categories(X)
-
-                self.encoder = ColumnTransformer(
-                    [
-                        ("encoder",
-                         preprocessing.OrdinalEncoder(
-                             handle_unknown='use_encoded_value',
-                             unknown_value=-1,
-                         ), self.enc_columns)],
-                    remainder="passthrough"
-                )
+            categorical_columns, numerical_columns, feat_type = self._get_columns_info(X)
+
+            preprocessors = get_tabular_preprocessors()
+            self.column_transformer = _create_column_transformer(preprocessors=preprocessors,
+                                                                 numerical_columns=numerical_columns,
+                                                                 categorical_columns=categorical_columns)
+
+            # Mypy redefinition
+            assert self.column_transformer is not None
+            self.column_transformer.fit(X)
+
+            # The column transformer reoders the feature types - we therefore need to change
+            # it as well
+            # This means columns are shifted to the right
+            def comparator(cmp1: str, cmp2: str) -> int:
+                if (
+                    cmp1 == 'categorical' and cmp2 == 'categorical'
+                    or cmp1 == 'numerical' and cmp2 == 'numerical'
+                ):
+                    return 0
+                elif cmp1 == 'categorical' and cmp2 == 'numerical':
+                    return -1
+                elif cmp1 == 'numerical' and cmp2 == 'categorical':
+                    return 1
+                else:
+                    raise ValueError((cmp1, cmp2))
 
-                # Mypy redefinition
-                assert self.encoder is not None
-                self.encoder.fit(X)
-
-                # The column transformer reoders the feature types - we therefore need to change
-                # it as well
-                # This means columns are shifted to the right
-                def comparator(cmp1: str, cmp2: str) -> int:
-                    if (
-                        cmp1 == 'categorical' and cmp2 == 'categorical'
-                        or cmp1 == 'numerical' and cmp2 == 'numerical'
-                    ):
-                        return 0
-                    elif cmp1 == 'categorical' and cmp2 == 'numerical':
-                        return -1
-                    elif cmp1 == 'numerical' and cmp2 == 'categorical':
-                        return 1
-                    else:
-                        raise ValueError((cmp1, cmp2))
-
-                self.feat_type = sorted(
-                    self.feat_type,
-                    key=functools.cmp_to_key(comparator)
-                )
+            self.feat_type = sorted(
+                feat_type,
+                key=functools.cmp_to_key(comparator)
+            )
 
+            if len(categorical_columns) > 0:
+                print(self.column_transformer.named_transformers_['categorical_pipeline'].named_steps)
                 self.categories = [
                     # We fit an ordinal encoder, where all categorical
                     # columns are shifted to the left
                     list(range(len(cat)))
-                    for cat in self.encoder.transformers_[0][1].categories_
+                    for cat in self.column_transformer.named_transformers_['categorical_pipeline'].named_steps['onehotencoder'].categories_
                 ]
 
             for i, type_ in enumerate(self.feat_type):
@@ -151,23 +176,6 @@ def transform(
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
             X = typing.cast(pd.DataFrame, X)
-            # If we had null columns in our fit call and we made them numeric, then:
-            # - If the columns are null even in transform, apply the same procedure.
-            # - Otherwise, substitute the values with np.NaN and then make the columns numeric.
-            # If the column is null here, but it was not in fit, it does not matter.
-            for column in self.null_columns:
-                # The column is not null, make it null since it was null in fit.
-                if not X[column].isna().all():
-                    X[column] = np.NaN
-                X[column] = pd.to_numeric(X[column])
-
-            # for the test set, if we have columns with only null values
-            # they will probably have a numeric type. If these columns were not
-            # with only null values in the train set, they should be converted
-            # to the type that they had during fitting.
-            for column in X.columns:
-                if X[column].isna().all():
-                    X[column] = X[column].astype(self.dtypes[list(X.columns).index(column)])
 
             # Also remove the object dtype for new data
             if not X.select_dtypes(include='object').empty:
@@ -177,10 +185,7 @@ def transform(
         self._check_data(X)
         # We also need to fillna on the transformation
         # in case test data is provided
-        X = self.impute_nan_in_categories(X)
-
-        if self.encoder is not None:
-            X = self.encoder.transform(X)
+        X = self.column_transformer.transform(X)
 
         # Sparse related transformations
         # Not all sparse format support index sorting
@@ -254,7 +259,7 @@ def _check_data(
 
             # Define the column to be encoded here as the feature validator is fitted once
             # per estimator
-            enc_columns, _ = self._get_columns_to_encode(X)
+            # enc_columns, _ = self._get_columns_to_encode(X)
 
             column_order = [column for column in X.columns]
             if len(self.column_order) > 0:
@@ -279,10 +284,10 @@ def _check_data(
             else:
                 self.dtypes = dtypes
 
-    def _get_columns_to_encode(
+    def _get_columns_info(
         self,
         X: pd.DataFrame,
-    ) -> typing.Tuple[typing.List[str], typing.List[str]]:
+    ) -> typing.Tuple[typing.List[str], typing.List[str], typing.List[str]]:
         """
         Return the columns to be encoded from a pandas dataframe
 
@@ -297,8 +302,8 @@ def _get_columns_to_encode(
                 Type of each column numerical/categorical
         """
         # Register if a column needs encoding
-        enc_columns = []
-
+        numerical_columns = []
+        categorical_columns = []
         # Also, register the feature types for the estimator
         feat_type = []
 
@@ -306,7 +311,7 @@ def _get_columns_to_encode(
         for i, column in enumerate(X.columns):
             if X[column].dtype.name in ['category', 'bool']:
 
-                enc_columns.append(column)
+                categorical_columns.append(column)
                 feat_type.append('categorical')
             # Move away from np.issubdtype as it causes
             # TypeError: data type not understood in certain pandas types
@@ -348,7 +353,8 @@ def _get_columns_to_encode(
                     )
             else:
                 feat_type.append('numerical')
-        return enc_columns, feat_type
+                numerical_columns.append(column)
+        return categorical_columns, numerical_columns, feat_type
 
     def list_to_dataframe(
         self,
@@ -432,60 +438,4 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
                     X[column] = X[column].astype('category')
             self.object_dtype_mapping = {column: X[column].dtype for column in X.columns}
         self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}")
-        return X
-
-    def impute_nan_in_categories(self, X: pd.DataFrame) -> pd.DataFrame:
-        """
-        impute missing values before encoding,
-        remove once sklearn natively supports
-        it in ordinal encoding. Sklearn issue:
-        "https://github.com/scikit-learn/scikit-learn/issues/17123)"
-
-        Arguments:
-            X (pd.DataFrame):
-                data to be interpreted.
-
-        Returns:
-            pd.DataFrame
-        """
-
-        # To be on the safe side, map always to the same missing
-        # value per column
-        if not hasattr(self, 'dict_nancol_to_missing'):
-            self.dict_missing_value_per_col: typing.Dict[str, typing.Any] = {}
-
-        # First make sure that we do not alter the type of the column which cause:
-        # TypeError: '<' not supported between instances of 'int' and 'str'
-        # in the encoding
-        for column in self.enc_columns:
-            if X[column].isna().any():
-                if column not in self.dict_missing_value_per_col:
-                    try:
-                        float(X[column].dropna().values[0])
-                        can_cast_as_number = True
-                    except Exception:
-                        can_cast_as_number = False
-                    if can_cast_as_number:
-                        # In this case, we expect to have a number as category
-                        # it might be string, but its value represent a number
-                        missing_value: typing.Union[str, int] = '-1' if isinstance(X[column].dropna().values[0],
-                                                                                   str) else -1
-                    else:
-                        missing_value = 'Missing!'
-
-                    # Make sure this missing value is not seen before
-                    # Do this check for categorical columns
-                    # else modify the value
-                    if hasattr(X[column], 'cat'):
-                        while missing_value in X[column].cat.categories:
-                            if isinstance(missing_value, str):
-                                missing_value += '0'
-                            else:
-                                missing_value += missing_value
-                    self.dict_missing_value_per_col[column] = missing_value
-
-                # Convert the frame in place
-                X[column].cat.add_categories([self.dict_missing_value_per_col[column]],
-                                             inplace=True)
-                X.fillna({column: self.dict_missing_value_per_col[column]}, inplace=True)
-        return X
+        return X
\ No newline at end of file
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
index e1e08e94e..46feb0ac4 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
@@ -48,14 +48,14 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
             "TabularColumnTransformer": an instance of self
         """
         self.check_requirements(X, y)
-        numerical_pipeline = 'drop'
-        categorical_pipeline = 'drop'
-
-        preprocessors = get_tabular_preprocessers(X)
-        if len(X['dataset_properties']['numerical_columns']):
-            numerical_pipeline = make_pipeline(*preprocessors['numerical'])
-        if len(X['dataset_properties']['categorical_columns']):
-            categorical_pipeline = make_pipeline(*preprocessors['categorical'])
+        numerical_pipeline = 'passthrough'
+        categorical_pipeline = 'passthrough'
+
+        # preprocessors = get_tabular_preprocessers(X)
+        # if len(X['dataset_properties']['numerical_columns']):
+        #     numerical_pipeline = make_pipeline(*preprocessors['numerical'])
+        # if len(X['dataset_properties']['categorical_columns']):
+        #     categorical_pipeline = make_pipeline(*preprocessors['categorical'])
 
         self.preprocessor = ColumnTransformer([
             ('numerical_pipeline', numerical_pipeline, X['dataset_properties']['numerical_columns']),
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
index eadc0a188..9829cadcd 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
@@ -28,5 +28,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
             raise ValueError("cant call transform on {} without fitting first."
                              .format(self.__class__.__name__))
-        X.update({'encoder': self.preprocessor})
+        # X.update({'encoder': self.preprocessor})
         return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
index b65f3c229..ac0648481 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
@@ -29,5 +29,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
             raise ValueError("cant call transform on {} without fitting first."
                              .format(self.__class__.__name__))
-        X.update({'imputer': self.preprocessor})
+        # X.update({'imputer': self.preprocessor})
         return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
index 39834dd2b..270fac246 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
@@ -28,5 +28,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
             raise ValueError("cant call transform on {} without fitting first."
                              .format(self.__class__.__name__))
-        X.update({'scaler': self.preprocessor})
+        # X.update({'scaler': self.preprocessor})
         return X
diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
index 5ae2880ed..42cbc62bb 100644
--- a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
+++ b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
@@ -33,19 +33,20 @@ def build_embedding(self, num_input_features: np.ndarray, num_numerical_features
 
     def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
         # Feature preprocessors can alter numerical columns
-        if len(X['dataset_properties']['numerical_columns']) == 0:
-            num_numerical_columns = 0
-        else:
-            X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
-
-            numerical_column_transformer = X['tabular_transformer'].preprocessor. \
-                named_transformers_['numerical_pipeline']
-            num_numerical_columns = numerical_column_transformer.transform(
-                X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
-        num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
-                                      dtype=int)
-        categories = X['dataset_properties']['categories']
-
-        for i, category in enumerate(categories):
-            num_input_features[num_numerical_columns + i, ] = len(category)
-        return num_numerical_columns, num_input_features
+        # if len(X['dataset_properties']['numerical_columns']) == 0:
+        #     num_numerical_columns = 0
+        # else:
+        #     X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
+        #
+        #     numerical_column_transformer = X['tabular_transformer'].preprocessor. \
+        #         named_transformers_['numerical_pipeline']
+        #     num_numerical_columns = numerical_column_transformer.transform(
+        #         X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
+        # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
+        #                               dtype=int)
+        # categories = X['dataset_properties']['categories']
+        #
+        # for i, category in enumerate(categories):
+        #     num_input_features[num_numerical_columns + i, ] = len(category)
+        # return num_numerical_columns, num_input_features
+        return None, None
\ No newline at end of file

From b76b05e57ff0d383c2e0e0b602386407ea8c391b Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 3 Aug 2021 18:40:32 +0200
Subject: [PATCH 02/54] add time debug statements

---
 .../tabular_preprocessing/TabularColumnTransformer.py      | 7 ++++++-
 .../pipeline/components/training/trainer/base_trainer.py   | 7 +++++++
 .../components/training/trainer/base_trainer_choice.py     | 4 +++-
 .../pipeline/components/training/trainer/cutout_utils.py   | 2 ++
 .../pipeline/components/training/trainer/mixup_utils.py    | 2 ++
 5 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
index 46feb0ac4..5fcf5cfb5 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
@@ -4,7 +4,7 @@
 
 from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import make_pipeline
-
+import time
 import torch
 
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
@@ -23,6 +23,7 @@ def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = N
         self.add_fit_requirements([
             FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True),
             FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True)])
+        self.fit_time = None
 
     def get_column_transformer(self) -> ColumnTransformer:
         """
@@ -47,6 +48,8 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
         Returns:
             "TabularColumnTransformer": an instance of self
         """
+        start_time = time.time()
+
         self.check_requirements(X, y)
         numerical_pipeline = 'passthrough'
         categorical_pipeline = 'passthrough'
@@ -71,6 +74,8 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
             X_train = X['backend'].load_datamanager().train_tensors[0]
 
         self.preprocessor.fit(X_train)
+        self.fit_time = time.time() - start_time
+
         return self
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
index b77bb729a..934c6c315 100644
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -221,6 +221,8 @@ def __init__(self, weighted_loss: int = 0,
         self.add_fit_requirements([
             FitRequirement("is_cyclic_scheduler", (bool,), user_defined=False, dataset_property=False),
         ])
+        self.batch_fit_times = []
+        self.data_loading_times = []
 
     def prepare(
         self,
@@ -363,12 +365,16 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int,
         outputs_data = list()
         targets_data = list()
 
+        batch_load_start_time = time.time()
         for step, (data, targets) in enumerate(train_loader):
+            self.data_loading_times.append(time.time() - batch_load_start_time)
+            batch_train_start = time.time()
             if self.budget_tracker.is_max_time_reached():
                 break
 
             loss, outputs = self.train_step(data, targets)
 
+            self.batch_fit_times.append(time.time() - batch_train_start)
             # save for metric evaluation
             outputs_data.append(outputs.detach().cpu())
             targets_data.append(targets.detach().cpu())
@@ -383,6 +389,7 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int,
                     loss,
                     epoch * len(train_loader) + step,
                 )
+        batch_load_start_time = time.time()
 
         if self.scheduler:
             if 'ReduceLROnPlateau' in self.scheduler.__class__.__name__:
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
index 502445c14..27c64461e 100755
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
@@ -77,6 +77,7 @@ def __init__(self,
                            (torch.utils.data.DataLoader,),
                            user_defined=False, dataset_property=False)]
         self.checkpoint_dir = None  # type: Optional[str]
+        self.fit_time = None
 
     def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
         return self._fit_requirements
@@ -263,6 +264,7 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom
         Returns:
             A instance of self
         """
+        start_time = time.time()
         # Make sure that the prerequisites are there
         self.check_requirements(X, y)
 
@@ -285,7 +287,7 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom
         self.choice: autoPyTorchComponent = cast(autoPyTorchComponent, self.choice)
         if self.choice.use_snapshot_ensemble:
             X['network_snapshots'].extend(self.choice.model_snapshots)
-
+        self.fit_time = time.time() - start_time
         return self.choice
 
     def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoice':
diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
index c7feb2214..c58546a4c 100644
--- a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
+++ b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
@@ -53,6 +53,8 @@ def __init__(self, patch_ratio: float,
         self.lookahead_config = lookahead_config
         self.patch_ratio = patch_ratio
         self.cutout_prob = cutout_prob
+        self.batch_fit_times = []
+        self.data_loading_times = []
 
     def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0
                               ) -> Callable:
diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
index a2325b91c..b1cf37972 100644
--- a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
+++ b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
@@ -51,6 +51,8 @@ def __init__(self, alpha: float,
                                 f'{Lookahead.__name__}:la_alpha': 0.6}
         self.lookahead_config = lookahead_config
         self.alpha = alpha
+        self.batch_fit_times = []
+        self.data_loading_times = []
 
     def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0
                               ) -> Callable:

From bbf9b07f6d48a1fd1441a4992f86c068c11b197a Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 3 Aug 2021 19:50:09 +0200
Subject: [PATCH 03/54] Add fixes for categorical data

---
 autoPyTorch/data/tabular_feature_validator.py | 76 +++++++++++++++++--
 1 file changed, 68 insertions(+), 8 deletions(-)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 69ff55fa5..16185817b 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -16,7 +16,7 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import StandardScaler, OrdinalEncoder
 
 from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES
 
@@ -41,6 +41,10 @@ def get_tabular_preprocessors():
     preprocessors['numerical'] = list()
     preprocessors['categorical'] = list()
 
+    preprocessors['categorical'].append(OneHotEncoder(
+        categories='auto',
+        sparse=False,
+        handle_unknown='ignore'))
     preprocessors['categorical'].append(SimpleImputer(strategy='constant',
                   # Train data is numpy
                   # as of this point, where
@@ -51,13 +55,10 @@ def get_tabular_preprocessors():
                   fill_value=-1,
                   copy=False))
 
-    # preprocessors['categorical'].append(("ordinal-encoder", OrdinalEncoder(
-    #      handle_unknown='use_encoded_value',
-    #      unknown_value=-1)))
-    preprocessors['categorical'].append(OneHotEncoder(
-        categories='auto',
-        sparse=False,
-        handle_unknown='ignore'))
+    preprocessors['categorical'].append(OrdinalEncoder(
+         handle_unknown='use_encoded_value',
+         unknown_value=-1))
+
     preprocessors['numerical'].append(SimpleImputer(strategy='median',
                                                                   copy=False))
     preprocessors['numerical'].append(StandardScaler(with_mean=True, with_std=True, copy=False))
@@ -98,6 +99,9 @@ def _fit(
             self._check_data(X)
             categorical_columns, numerical_columns, feat_type = self._get_columns_info(X)
 
+            self.enc_columns = categorical_columns
+            if len(categorical_columns) >= 0:
+                X = self.impute_nan_in_categories(X)
             preprocessors = get_tabular_preprocessors()
             self.column_transformer = _create_column_transformer(preprocessors=preprocessors,
                                                                  numerical_columns=numerical_columns,
@@ -185,6 +189,8 @@ def transform(
         self._check_data(X)
         # We also need to fillna on the transformation
         # in case test data is provided
+        if len(self.categorical_columns) >= 0:
+            X = self.impute_nan_in_categories(X)
         X = self.column_transformer.transform(X)
 
         # Sparse related transformations
@@ -438,4 +444,58 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
                     X[column] = X[column].astype('category')
             self.object_dtype_mapping = {column: X[column].dtype for column in X.columns}
         self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}")
+        return X
+
+    def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) -> pd.DataFrame:
+        """
+        impute missing values before encoding,
+        remove once sklearn natively supports
+        it in ordinal encoding. Sklearn issue:
+        "https://github.com/scikit-learn/scikit-learn/issues/17123)"
+        Arguments:
+            X (pd.DataFrame):
+                data to be interpreted.
+        Returns:
+            pd.DataFrame
+        """
+
+        # To be on the safe side, map always to the same missing
+        # value per column
+        if not hasattr(self, 'dict_nancol_to_missing'):
+            self.dict_missing_value_per_col: typing.Dict[str, typing.Any] = {}
+
+        # First make sure that we do not alter the type of the column which cause:
+        # TypeError: '<' not supported between instances of 'int' and 'str'
+        # in the encoding
+        for column in self.enc_columns:
+            if X[column].isna().any():
+                if column not in self.dict_missing_value_per_col:
+                    try:
+                        float(X[column].dropna().values[0])
+                        can_cast_as_number = True
+                    except Exception:
+                        can_cast_as_number = False
+                    if can_cast_as_number:
+                        # In this case, we expect to have a number as category
+                        # it might be string, but its value represent a number
+                        missing_value: typing.Union[str, int] = '-1' if isinstance(X[column].dropna().values[0],
+                                                                                   str) else -1
+                    else:
+                        missing_value = 'Missing!'
+
+                    # Make sure this missing value is not seen before
+                    # Do this check for categorical columns
+                    # else modify the value
+                    if hasattr(X[column], 'cat'):
+                        while missing_value in X[column].cat.categories:
+                            if isinstance(missing_value, str):
+                                missing_value += '0'
+                            else:
+                                missing_value += missing_value
+                    self.dict_missing_value_per_col[column] = missing_value
+
+                # Convert the frame in place
+                X[column].cat.add_categories([self.dict_missing_value_per_col[column]],
+                                             inplace=True)
+                X.fillna({column: self.dict_missing_value_per_col[column]}, inplace=True)
         return X
\ No newline at end of file

From 99d74077fc9a145c1dd9b518fff439589f8b013c Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 5 Aug 2021 16:00:21 +0200
Subject: [PATCH 04/54] add fit_ensemble

---
 autoPyTorch/api/base_task.py | 164 ++++++++++++++++++++++++++++++-----
 autoPyTorch/utils/backend.py |   4 +-
 2 files changed, 146 insertions(+), 22 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 11b0de273..c2e220875 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -189,6 +189,9 @@ def __init__(
         self.trajectory: Optional[List] = None
         self.dataset_name: Optional[str] = None
         self.cv_models_: Dict = {}
+        self.precision: Optional[int] = None
+        self.opt_metric: Optional[str] = None
+        self.dataset: Optional[BaseDataset] = None
 
         # By default try to use the TCP logging port or get a new port
         self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
@@ -936,26 +939,12 @@ def _search(
             self._logger.info("Starting ensemble")
             ensemble_task_name = 'ensemble'
             self._stopwatch.start_task(ensemble_task_name)
-            proc_ensemble = EnsembleBuilderManager(
-                start_time=time.time(),
-                time_left_for_ensembles=time_left_for_ensembles,
-                backend=copy.deepcopy(self._backend),
-                dataset_name=str(dataset.dataset_name),
-                output_type=STRING_TO_OUTPUT_TYPES[dataset.output_type],
-                task_type=STRING_TO_TASK_TYPES[self.task_type],
-                metrics=[self._metric],
-                opt_metric=optimize_metric,
-                ensemble_size=self.ensemble_size,
-                ensemble_nbest=self.ensemble_nbest,
-                max_models_on_disc=self.max_models_on_disc,
-                seed=self.seed,
-                max_iterations=None,
-                read_at_most=sys.maxsize,
-                ensemble_memory_limit=self._memory_limit,
-                random_state=self.seed,
-                precision=precision,
-                logger_port=self._logger_port,
-            )
+            proc_ensemble = self._init_ensemble_builder(time_left_for_ensembles=time_left_for_ensembles,
+                                                        ensemble_size=self.ensemble_size,
+                                                        ensemble_nbest=self.ensemble_nbest,
+                                                        precision=precision,
+                                                        optimize_metric=self.opt_metric
+                                                        )
             self._stopwatch.stop_task(ensemble_task_name)
 
         # ==> Run SMAC
@@ -1333,6 +1322,141 @@ def fit_pipeline(self,
 
         return fitted_pipeline, run_info, run_value, dataset
 
+    def fit_ensemble(
+            self,
+            ensemble_nbest: int = 50,
+            ensemble_size: int = 50,
+            precision: int = 32,
+            load_models: bool = True
+    ) -> 'BaseTask':
+        """
+        Enables post-hoc fitting of the ensemble after the `search()`
+        method is finished. This method creates an ensemble using all
+        the models stored on disk during the smbo run
+        Args:
+            ensemble_nbest (Optional[int]):
+                only consider the ensemble_nbest models to build the ensemble.
+                If None, uses the value stored in class attribute `ensemble_nbest`.
+            ensemble_size (int) (default=50):
+                Number of models added to the ensemble built by
+                Ensemble selection from libraries of models.
+                Models are drawn with replacement.
+            precision (int), (default=32): Numeric precision used when loading
+                ensemble data. Can be either 16, 32 or 64.
+        Returns:
+            self
+        """
+        # Make sure that input is valid
+        if self.dataset is None or self.opt_metric is None:
+            raise ValueError("fit_ensemble() can only be called after `search()`. "
+                             "Please call the `search()` method of {} prior to "
+                             "fit_ensemble().".format(self.__class__.__name__))
+
+        if self._logger is None:
+            self._logger = self._get_logger(self.dataset.dataset_name)
+
+        # Create a client if needed
+        if self._dask_client is None:
+            self._create_dask_client()
+        else:
+            self._is_dask_client_internally_created = False
+
+        manager = self._init_ensemble_builder(
+            time_left_for_ensembles=self._time_for_task,
+            optimize_metric=self.opt_metric,
+            precision=precision,
+            ensemble_size=ensemble_size,
+            ensemble_nbest=ensemble_nbest,
+        )
+
+        manager.build_ensemble(self._dask_client)
+        future = manager.futures.pop()
+        result = future.result()
+        if result is None:
+            raise ValueError("Errors occurred while building the ensemble - please"
+                             " check the log file and command line output for error messages.")
+        self.ensemble_performance_history, _, _, _ = result
+
+        if load_models:
+            self._load_models()
+        if self._logger is not None:
+            self._logger.info("Closing the dask infrastructure")
+            self._close_dask_client()
+            self._logger.info("Finished closing the dask infrastructure")
+
+            # Clean up the logger
+            self._logger.info("Starting to clean up the logger")
+            self._clean_logger()
+        else:
+            self._close_dask_client()
+
+        return self
+
+    def _init_ensemble_builder(
+            self,
+            time_left_for_ensembles: float,
+            optimize_metric: str,
+            ensemble_nbest: int,
+            ensemble_size: int,
+            precision: int = 32,
+    ) -> EnsembleBuilderManager:
+        """
+        Initializes an `EnsembleBuilderManager`.
+        Args:
+            time_left_for_ensembles (float):
+                Time (in seconds) allocated to building the ensemble
+            optimize_metric (str):
+                Name of the metric to optimize the ensemble.
+            ensemble_nbest (int):
+                only consider the ensemble_nbest models to build the ensemble.
+            ensemble_size (int):
+                Number of models added to the ensemble built by
+                Ensemble selection from libraries of models.
+                Models are drawn with replacement.
+            precision (int), (default=32): Numeric precision used when loading
+                ensemble data. Can be either 16, 32 or 64.
+        Returns:
+            EnsembleBuilderManager
+        """
+        if self._logger is None:
+            raise ValueError("logger should be initialized to fit ensemble")
+        if self.dataset is None:
+            raise ValueError("ensemble can only be initialised after or during `search()`. "
+                             "Please call the `search()` method of {}.".format(self.__class__.__name__))
+
+        self._logger.info("Starting ensemble")
+        ensemble_task_name = 'ensemble'
+        self._stopwatch.start_task(ensemble_task_name)
+
+        # Use the current thread to start the ensemble builder process
+        # The function ensemble_builder_process will internally create a ensemble
+        # builder in the provide dask client
+        required_dataset_properties = {'task_type': self.task_type,
+                                       'output_type': self.dataset.output_type}
+        proc_ensemble = EnsembleBuilderManager(
+            start_time=time.time(),
+            time_left_for_ensembles=time_left_for_ensembles,
+            backend=copy.deepcopy(self._backend),
+            dataset_name=str(self.dataset.dataset_name),
+            output_type=STRING_TO_OUTPUT_TYPES[self.dataset.output_type],
+            task_type=STRING_TO_TASK_TYPES[self.task_type],
+            metrics=[self._metric] if self._metric is not None else get_metrics(
+                dataset_properties=required_dataset_properties, names=[optimize_metric]),
+            opt_metric=optimize_metric,
+            ensemble_size=ensemble_size,
+            ensemble_nbest=ensemble_nbest,
+            max_models_on_disc=self.max_models_on_disc,
+            seed=self.seed,
+            max_iterations=None,
+            read_at_most=sys.maxsize,
+            ensemble_memory_limit=self._memory_limit,
+            random_state=self.seed,
+            precision=precision,
+            logger_port=self._logger_port,
+        )
+        self._stopwatch.stop_task(ensemble_task_name)
+        return proc_ensemble
+
     def predict(
         self,
         X_test: np.ndarray,
diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py
index c9681adb3..713c7d572 100644
--- a/autoPyTorch/utils/backend.py
+++ b/autoPyTorch/utils/backend.py
@@ -205,12 +205,12 @@ def temporary_directory(self) -> str:
 
     def _make_internals_directory(self) -> None:
         try:
-            os.makedirs(self.internals_directory)
+            os.makedirs(self.internals_directory, exist_ok=True)
         except Exception as e:
             if self._logger is not None:
                 self._logger.debug("_make_internals_directory: %s" % e)
         try:
-            os.makedirs(self.get_runs_directory())
+            os.makedirs(self.get_runs_directory(), exist_ok=True)
         except Exception as e:
             if self._logger is not None:
                 self._logger.debug("_make_internals_directory: %s" % e)

From 814477455752a762f86fccb7e955377416d8a92a Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 31 Aug 2021 11:54:11 +0200
Subject: [PATCH 05/54] add arlind fix for swa and se

---
 .../components/training/trainer/base_trainer.py        | 10 ++++++++--
 .../components/training/trainer/base_trainer_choice.py |  5 +++--
 requirements.txt                                       |  6 +++---
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
index 934c6c315..188504da3 100644
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -320,8 +320,14 @@ def on_epoch_end(self, X: Dict[str, Any], epoch: int) -> bool:
                 if self.use_snapshot_ensemble:
                     assert self.model_snapshots is not None, "model snapshots container can't be " \
                                                              "none when snapshot ensembling is enabled"
-                    model_copy = deepcopy(self.swa_model) if self.use_stochastic_weight_averaging \
-                        else deepcopy(self.model)
+                    if epoch == self.budget_tracker.max_epochs:
+                        if self.use_stochastic_weight_averaging:
+                            model_copy = deepcopy(self.swa_model)
+                        else:
+                            model_copy = deepcopy(self.model)
+                    else:
+                        model_copy = deepcopy(self.model)
+
                     assert model_copy is not None
                     model_copy.cpu()
                     self.model_snapshots.append(model_copy)
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
index 27c64461e..2dcb8fe16 100755
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
@@ -410,8 +410,9 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
             # change model
             update_model_state_dict_from_swa(X['network'], self.choice.swa_model.state_dict())
             if self.choice.use_snapshot_ensemble:
-                for model in self.choice.model_snapshots:
-                    swa_utils.update_bn(X['train_data_loader'], model.double())
+                swa_utils.update_bn(X['train_data_loader'], model.double())
+                # we update only the last network which pertains to the stochastic weight averaging model
+                swa_utils.update_bn(X['train_data_loader'], self.choice.model_snapshots[-1].double())
 
         # wrap up -- add score if not evaluating every epoch
         if not self.eval_valid_each_epoch(X):
diff --git a/requirements.txt b/requirements.txt
index c79104461..2195e64b4 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
 pandas
-torch
-torchvision
+torch<=1.8
+torchvision<=0.9
 tensorboard
 scikit-learn>=0.24.0,<0.25.0
 numpy
-scipy
+scipy==1.6.3
 lockfile
 imgaug>=0.4.0
 ConfigSpace>=0.4.14,<0.5

From 06ad6584746f5852dd4d31a3c7706ef44a218159 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 6 Sep 2021 18:26:43 +0200
Subject: [PATCH 06/54] fix bug in trainer choice fit

---
 .../pipeline/components/training/trainer/base_trainer_choice.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
index 2dcb8fe16..7119df201 100755
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
@@ -410,7 +410,6 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
             # change model
             update_model_state_dict_from_swa(X['network'], self.choice.swa_model.state_dict())
             if self.choice.use_snapshot_ensemble:
-                swa_utils.update_bn(X['train_data_loader'], model.double())
                 # we update only the last network which pertains to the stochastic weight averaging model
                 swa_utils.update_bn(X['train_data_loader'], self.choice.model_snapshots[-1].double())
 

From 1942279d1ec5aabfcc2d2127ddc3bd9dfd056293 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 8 Sep 2021 13:16:20 +0200
Subject: [PATCH 07/54] fix ensemble bug

---
 autoPyTorch/api/base_task.py | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index c2e220875..6ed0559e0 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -397,6 +397,7 @@ def _clean_logger(self) -> None:
             self.logging_server.join(timeout=5)
             self.logging_server.terminate()
             del self.stop_logging_server
+            self._logger = None
 
     def _create_dask_client(self) -> None:
         """
@@ -491,6 +492,23 @@ def _load_models(self) -> bool:
 
         return True
 
+    def _cleanup(self) -> None:
+        """
+        Closes the different servers created during api search.
+        Returns:
+                None
+        """
+        if self._logger is not None:
+            self._logger.info("Closing the dask infrastructure")
+            self._close_dask_client()
+            self._logger.info("Finished closing the dask infrastructure")
+
+            # Clean up the logger
+            self._logger.info("Starting to clean up the logger")
+            self._clean_logger()
+        else:
+            self._close_dask_client()
+
     def _load_best_individual_model(self) -> SingleBest:
         """
         In case of failure during ensemble building,
@@ -923,6 +941,8 @@ def _search(
                 self._stopwatch.stop_task(traditional_task_name)
 
         # ============> Starting ensemble
+        self.precision = precision
+        self.opt_metric = optimize_metric
         elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
         time_left_for_ensembles = max(0, total_walltime_limit - elapsed_time)
         proc_ensemble = None
@@ -1024,18 +1044,12 @@ def _search(
                 pd.DataFrame(self.ensemble_performance_history).to_json(
                     os.path.join(self._backend.internals_directory, 'ensemble_history.json'))
 
-        self._logger.info("Closing the dask infrastructure")
-        self._close_dask_client()
-        self._logger.info("Finished closing the dask infrastructure")
-
         if load_models:
             self._logger.info("Loading models...")
             self._load_models()
             self._logger.info("Finished loading models...")
 
-        # Clean up the logger
-        self._logger.info("Starting to clean up the logger")
-        self._clean_logger()
+        self._cleanup()
 
         return self
 
@@ -1506,7 +1520,7 @@ def predict(
 
         predictions = self.ensemble_.predict(all_predictions)
 
-        self._clean_logger()
+        self._cleanup()
 
         return predictions
 
@@ -1543,10 +1557,7 @@ def __getstate__(self) -> Dict[str, Any]:
         return self.__dict__
 
     def __del__(self) -> None:
-        # Clean up the logger
-        self._clean_logger()
-
-        self._close_dask_client()
+        self._cleanup()
 
         # When a multiprocessing work is done, the
         # objects are deleted. We don't want to delete run areas

From 2dc88500566ac67ad30018fcba00a4e7e62d1cb3 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 8 Sep 2021 16:48:58 +0200
Subject: [PATCH 08/54] Correct bug in cleanup

---
 autoPyTorch/api/base_task.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 6ed0559e0..19951a3a5 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -498,7 +498,7 @@ def _cleanup(self) -> None:
         Returns:
                 None
         """
-        if self._logger is not None:
+        if hasattr(self, '_logger') and self._logger is not None:
             self._logger.info("Closing the dask infrastructure")
             self._close_dask_client()
             self._logger.info("Finished closing the dask infrastructure")

From 06d80d471898b41b59104a3bc92800f02f275b54 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 16 Sep 2021 14:44:10 +0200
Subject: [PATCH 09/54] Cleanup for removing time debug statements

---
 .../tabular_preprocessing/TabularColumnTransformer.py        | 3 ---
 .../pipeline/components/training/trainer/base_trainer.py     | 5 -----
 .../components/training/trainer/base_trainer_choice.py       | 3 ---
 .../pipeline/components/training/trainer/cutout_utils.py     | 2 --
 .../pipeline/components/training/trainer/mixup_utils.py      | 2 --
 5 files changed, 15 deletions(-)

diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
index 5fcf5cfb5..c7ca61e09 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
@@ -23,7 +23,6 @@ def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = N
         self.add_fit_requirements([
             FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True),
             FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True)])
-        self.fit_time = None
 
     def get_column_transformer(self) -> ColumnTransformer:
         """
@@ -48,7 +47,6 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
         Returns:
             "TabularColumnTransformer": an instance of self
         """
-        start_time = time.time()
 
         self.check_requirements(X, y)
         numerical_pipeline = 'passthrough'
@@ -74,7 +72,6 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
             X_train = X['backend'].load_datamanager().train_tensors[0]
 
         self.preprocessor.fit(X_train)
-        self.fit_time = time.time() - start_time
 
         return self
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
index 188504da3..6040f32e9 100644
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -371,16 +371,12 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int,
         outputs_data = list()
         targets_data = list()
 
-        batch_load_start_time = time.time()
         for step, (data, targets) in enumerate(train_loader):
-            self.data_loading_times.append(time.time() - batch_load_start_time)
-            batch_train_start = time.time()
             if self.budget_tracker.is_max_time_reached():
                 break
 
             loss, outputs = self.train_step(data, targets)
 
-            self.batch_fit_times.append(time.time() - batch_train_start)
             # save for metric evaluation
             outputs_data.append(outputs.detach().cpu())
             targets_data.append(targets.detach().cpu())
@@ -395,7 +391,6 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int,
                     loss,
                     epoch * len(train_loader) + step,
                 )
-        batch_load_start_time = time.time()
 
         if self.scheduler:
             if 'ReduceLROnPlateau' in self.scheduler.__class__.__name__:
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
index 7119df201..a344e92ce 100755
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
@@ -77,7 +77,6 @@ def __init__(self,
                            (torch.utils.data.DataLoader,),
                            user_defined=False, dataset_property=False)]
         self.checkpoint_dir = None  # type: Optional[str]
-        self.fit_time = None
 
     def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
         return self._fit_requirements
@@ -264,7 +263,6 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom
         Returns:
             A instance of self
         """
-        start_time = time.time()
         # Make sure that the prerequisites are there
         self.check_requirements(X, y)
 
@@ -287,7 +285,6 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom
         self.choice: autoPyTorchComponent = cast(autoPyTorchComponent, self.choice)
         if self.choice.use_snapshot_ensemble:
             X['network_snapshots'].extend(self.choice.model_snapshots)
-        self.fit_time = time.time() - start_time
         return self.choice
 
     def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoice':
diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
index c58546a4c..c7feb2214 100644
--- a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
+++ b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
@@ -53,8 +53,6 @@ def __init__(self, patch_ratio: float,
         self.lookahead_config = lookahead_config
         self.patch_ratio = patch_ratio
         self.cutout_prob = cutout_prob
-        self.batch_fit_times = []
-        self.data_loading_times = []
 
     def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0
                               ) -> Callable:
diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
index b1cf37972..a2325b91c 100644
--- a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
+++ b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
@@ -51,8 +51,6 @@ def __init__(self, alpha: float,
                                 f'{Lookahead.__name__}:la_alpha': 0.6}
         self.lookahead_config = lookahead_config
         self.alpha = alpha
-        self.batch_fit_times = []
-        self.data_loading_times = []
 
     def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0
                               ) -> Callable:

From d8b553aa2262825786440c26779e1f39b142d6e5 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 20 Sep 2021 15:55:20 +0200
Subject: [PATCH 10/54] ablation for adversarial

---
 .../components/training/trainer/AdversarialTrainer.py    | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
index c5a536dd0..36d586919 100644
--- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
@@ -189,12 +189,17 @@ def get_hyperparameter_search_space(
             default_value=3),
         epsilon: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="epsilon",
-            value_range=(0.05, 0.2),
-            default_value=0.2),
+            value_range=(0.001, 0.15),
+            default_value=0.007,
+            log=True),
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
 
+        epsilon = HyperparameterSearchSpace(hyperparameter="epsilon",
+                                            value_range=(0.007, 0.007),
+                                            default_value=0.007)
         add_hyperparameter(cs, epsilon, UniformFloatHyperparameter)
+
         add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter)
         snapshot_ensemble_flag = False
         if any(use_snapshot_ensemble.value_range):

From 34712b3b2d2c7a5ab810f795cf80abcf4090adb4 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 21 Sep 2021 13:08:54 +0200
Subject: [PATCH 11/54] shuffle false in dataloader

---
 .../components/training/data_loader/base_data_loader.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
index 5b8e445ac..8dff86052 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -112,7 +112,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
         self.train_data_loader = torch.utils.data.DataLoader(
             train_dataset,
             batch_size=min(self.batch_size, len(train_dataset)),
-            shuffle=True,
+            shuffle=False,
             num_workers=X.get('num_workers', 0),
             pin_memory=X.get('pin_memory', True),
             drop_last=X.get('drop_last', True),

From 49f40dc2715f9e40ec38455e073c5116e2ee2b1a Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 21 Sep 2021 14:44:48 +0200
Subject: [PATCH 12/54] drop last false in dataloader

---
 .../components/training/data_loader/base_data_loader.py       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
index 8dff86052..7302ac6f5 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -112,10 +112,10 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
         self.train_data_loader = torch.utils.data.DataLoader(
             train_dataset,
             batch_size=min(self.batch_size, len(train_dataset)),
-            shuffle=False,
+            shuffle=True,
             num_workers=X.get('num_workers', 0),
             pin_memory=X.get('pin_memory', True),
-            drop_last=X.get('drop_last', True),
+            drop_last=X.get('drop_last', False),
             collate_fn=custom_collate_fn,
         )
 

From f4ea158a4c5611137a2522dfa4237b32b4ca1941 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 23 Sep 2021 15:39:25 +0200
Subject: [PATCH 13/54] fix bug for validation set, and cutout and cutmix

---
 autoPyTorch/api/base_task.py                                 | 2 +-
 .../pipeline/components/training/trainer/RowCutMixTrainer.py | 2 +-
 .../pipeline/components/training/trainer/RowCutOutTrainer.py | 2 +-
 autoPyTorch/utils/backend.py                                 | 5 +++++
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 19951a3a5..14aa6ab83 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1244,7 +1244,7 @@ def fit_pipeline(self,
         dataset_requirements = get_dataset_requirements(
             info=self._get_required_dataset_properties(dataset))
         dataset_properties = dataset.get_dataset_properties(dataset_requirements)
-        self._backend.save_datamanager(dataset)
+        self._backend.replace_datamanager(dataset)
 
         if self._logger is None:
             self._logger = self._get_logger(dataset.dataset_name)
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index 20d02c793..f1b606046 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -36,7 +36,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
             return X, {'y_a': y, 'y_b': y[index], 'lam': 1}
 
         size = X.shape[1]
-        indices = torch.tensor(self.random_state.choice(range(1, size), max(1, np.int32(size * lam)),
+        indices = torch.tensor(self.random_state.choice(range(size), max(1, np.int32(size * lam)),
                                                         replace=False))
 
         X[:, indices] = X[index, :][:, indices]
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
index c09603523..d7bd23f4e 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
@@ -37,7 +37,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
             return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 
         size = X.shape[1]
-        indices = self.random_state.choice(range(1, size), max(1, np.int32(size * self.patch_ratio)),
+        indices = self.random_state.choice(range(size), max(1, np.int32(size * self.patch_ratio)),
                                            replace=False)
 
         """if not isinstance(self.numerical_columns, typing.Iterable):
diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py
index 713c7d572..7a7399a9f 100644
--- a/autoPyTorch/utils/backend.py
+++ b/autoPyTorch/utils/backend.py
@@ -328,6 +328,11 @@ def load_datamanager(self) -> BaseDataset:
             with open(filepath, 'rb') as fh:
                 return pickle.load(fh)
 
+    def replace_datamanager(self, datamanager: BaseDataset):
+        warnings.warn("Original dataset will be overwritten with the provided dataset")
+        os.remove(self._get_datamanager_pickle_filename())
+        self.save_datamanager(datamanager=datamanager)
+
     def get_runs_directory(self) -> str:
         return os.path.join(self.internals_directory, 'runs')
 

From 209a4e82a362cb0edf15432bafb8a526f7c19b3e Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 24 Sep 2021 12:36:54 +0200
Subject: [PATCH 14/54] shuffle = False

---
 .../components/training/data_loader/base_data_loader.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
index 7302ac6f5..bf0f23fa6 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -112,7 +112,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
         self.train_data_loader = torch.utils.data.DataLoader(
             train_dataset,
             batch_size=min(self.batch_size, len(train_dataset)),
-            shuffle=True,
+            shuffle=False,
             num_workers=X.get('num_workers', 0),
             pin_memory=X.get('pin_memory', True),
             drop_last=X.get('drop_last', False),

From 8fb0bc2c5c7b4f98e95785293b4da86e8a58f214 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Thu, 30 Sep 2021 16:06:57 +0200
Subject: [PATCH 15/54] Shake Shake updates (#287)

* To test locally

* fix bug in trainer choice fit

* fix ensemble bug

* Correct bug in cleanup

* To test locally

* Cleanup for removing time debug statements

* ablation for adversarial

* shuffle false in dataloader

* drop last false in dataloader

* fix bug for validation set, and cutout and cutmix

* To test locally

* shuffle = False

* To test locally

* updates to search space

* updates to search space

* update branch with search space

* undo search space update

* fix bug in shake shake flag

* limit to shake-even

* restrict to even even

* Add even even and others for shake-drop also

* fix bug in passing alpha beta method

* restrict to only even even

* fix silly bug:

* remove imputer and ordinal encoder for categorical transformer in feature validator

* Address comments from shuhei
---
 autoPyTorch/data/tabular_feature_validator.py | 30 +++++++++----------
 autoPyTorch/pipeline/base_pipeline.py         |  5 ++--
 .../setup/network_backbone/ResNetBackbone.py  | 24 +++++++++++----
 .../network_backbone/ShapedResNetBackbone.py  | 17 +++++++++--
 .../setup/network_backbone/utils.py           | 28 ++++++++++++++---
 .../setup/optimizer/AdamWOptimizer.py         |  4 +--
 .../training/data_loader/base_data_loader.py  |  2 +-
 .../example_custom_configuration_space.py     | 11 ++++++-
 8 files changed, 88 insertions(+), 33 deletions(-)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 16185817b..28d64a4b1 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -41,26 +41,26 @@ def get_tabular_preprocessors():
     preprocessors['numerical'] = list()
     preprocessors['categorical'] = list()
 
+    # preprocessors['categorical'].append(SimpleImputer(strategy='constant',
+    #               # Train data is numpy
+    #               # as of this point, where
+    #               # Ordinal Encoding is using
+    #               # for categorical. Only
+    #               # Numbers are allowed
+    #               # fill_value='!missing!',
+    #               fill_value=-1,
+    #               copy=False))
+
+    # preprocessors['categorical'].append(OrdinalEncoder(
+    #      handle_unknown='use_encoded_value',
+    #      unknown_value=-1))
+
     preprocessors['categorical'].append(OneHotEncoder(
         categories='auto',
         sparse=False,
         handle_unknown='ignore'))
-    preprocessors['categorical'].append(SimpleImputer(strategy='constant',
-                  # Train data is numpy
-                  # as of this point, where
-                  # Ordinal Encoding is using
-                  # for categorical. Only
-                  # Numbers are allowed
-                  # fill_value='!missing!',
-                  fill_value=-1,
-                  copy=False))
-
-    preprocessors['categorical'].append(OrdinalEncoder(
-         handle_unknown='use_encoded_value',
-         unknown_value=-1))
-
     preprocessors['numerical'].append(SimpleImputer(strategy='median',
-                                                                  copy=False))
+                                                    copy=False))
     preprocessors['numerical'].append(StandardScaler(with_mean=True, with_std=True, copy=False))
 
     return preprocessors
diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
index 842f63271..80d59a68f 100644
--- a/autoPyTorch/pipeline/base_pipeline.py
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -451,12 +451,13 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                             continue
                         raise ValueError("Unknown hyperparameter for component {}. "
                                          "Expected update hyperparameter "
-                                         "to be in {} got {}".format(node.__class__.__name__,
+                                         "to be in {} got {}. choice is {}".format(node.__class__.__name__,
                                                                      component.
                                                                      get_hyperparameter_search_space(
                                                                          dataset_properties=self.dataset_properties).
                                                                      get_hyperparameter_names(),
-                                                                     split_hyperparameter[1]))
+                                                                     split_hyperparameter[1],
+                                                                                   component.__name__))
             else:
                 if update.hyperparameter not in node.get_hyperparameter_search_space(
                         dataset_properties=self.dataset_properties):
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
index 069ca4679..10f509741 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
@@ -139,6 +139,14 @@ def get_hyperparameter_search_space(
                                                                                value_range=(True, False),
                                                                                default_value=True,
                                                                                ),
+        shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace(
+            hyperparameter="shake_alpha_beta_method",
+            value_range=('shake-shake',
+                         'shake-even',
+                         'even-even',
+                         'M3'),
+            default_value='shake-shake',
+        ),
         use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_shake_drop",
                                                                               value_range=(True, False),
                                                                               default_value=True,
@@ -180,9 +188,8 @@ def get_hyperparameter_search_space(
 
         if skip_connection_flag:
 
-            shake_drop_prob_flag = False
-            if 'shake-drop' in multi_branch_choice.value_range:
-                shake_drop_prob_flag = True
+            shake_shake_flag = 'shake-shake' in multi_branch_choice.value_range
+            shake_drop_prob_flag = 'shake-drop' in multi_branch_choice.value_range
 
             mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter)
             cs.add_hyperparameter(mb_choice)
@@ -192,6 +199,10 @@ def get_hyperparameter_search_space(
                 shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter)
                 cs.add_hyperparameter(shake_drop_prob)
                 cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop"))
+            if shake_shake_flag or shake_drop_prob_flag:
+                method = get_hyperparameter(shake_alpha_beta_method, CategoricalHyperparameter)
+                cs.add_hyperparameter(method)
+                cs.add_condition(CS.InCondition(method, mb_choice, ["shake-shake", "shake-drop"]))
 
         # It is the upper bound of the nr of groups,
         # since the configuration will actually be sampled.
@@ -327,11 +338,14 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
             if self.config["multi_branch_choice"] == 'shake-shake':
                 x1 = self.layers(x)
                 x2 = self.shake_shake_layers(x)
-                alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
+                alpha, beta = shake_get_alpha_beta(is_training=self.training,
+                                                   is_cuda=x.is_cuda,
+                                                   method=self.config['shake_alpha_beta_method'])
                 x = shake_shake(x1, x2, alpha, beta)
             elif self.config["multi_branch_choice"] == 'shake-drop':
                 x = self.layers(x)
-                alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
+                alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda,
+                                                   method=self.config['shake_alpha_beta_method'])
                 bl = shake_drop_get_bl(
                     self.block_index,
                     1 - self.config["max_shake_drop_probability"],
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
index e0867cdd3..12c6d4e74 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
@@ -145,6 +145,14 @@ def get_hyperparameter_search_space(  # type: ignore[override]
                                                                                           'stairs'),
                                                                              default_value='funnel',
                                                                              ),
+        shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace(
+            hyperparameter="shake_alpha_beta_method",
+            value_range=('shake-shake',
+                         'shake-even',
+                         'even-even',
+                         'M3'),
+            default_value='shake-shake',
+        ),
         max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="max_shake_drop_probability",
             value_range=(0, 1),
@@ -188,9 +196,8 @@ def get_hyperparameter_search_space(  # type: ignore[override]
 
         if skip_connection_flag:
 
-            shake_drop_prob_flag = False
-            if 'shake-drop' in multi_branch_choice.value_range:
-                shake_drop_prob_flag = True
+            shake_shake_flag = 'shake-shake' in multi_branch_choice.value_range
+            shake_drop_prob_flag = 'shake-drop' in multi_branch_choice.value_range
 
             mb_choice = get_hyperparameter(multi_branch_choice, CategoricalHyperparameter)
             cs.add_hyperparameter(mb_choice)
@@ -200,5 +207,9 @@ def get_hyperparameter_search_space(  # type: ignore[override]
                 shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter)
                 cs.add_hyperparameter(shake_drop_prob)
                 cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop"))
+            if shake_shake_flag or shake_drop_prob_flag:
+                method = get_hyperparameter(shake_alpha_beta_method, CategoricalHyperparameter)
+                cs.add_hyperparameter(method)
+                cs.add_condition(CS.InCondition(method, mb_choice, ["shake-shake", "shake-drop"]))
 
         return cs
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index ef19beac8..9a1f9dd4e 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -92,15 +92,35 @@ def backward(ctx: typing.Any,
 shake_drop = ShakeDropFunction.apply
 
 
-def shake_get_alpha_beta(is_training: bool, is_cuda: bool
-                         ) -> typing.Tuple[torch.tensor, torch.tensor]:
+def shake_get_alpha_beta(
+        is_training: bool,
+        is_cuda: bool,
+        method: str
+) -> typing.Tuple[torch.tensor, torch.tensor]:
+    """
+    The methods used in this function have been introduced in 'ShakeShake Regularisation'
+    https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well.
+    """
     if not is_training:
         result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))
         return result if not is_cuda else (result[0].cuda(), result[1].cuda())
 
     # TODO implement other update methods
-    alpha = torch.rand(1)
-    beta = torch.rand(1)
+    if method == 'even-even':
+        alpha = torch.FloatTensor([0.5])
+    else:
+        alpha = torch.rand(1)
+
+    if method == 'shake-shake':
+        beta = torch.rand(1)
+    elif method in ['shake-even', 'even-even']:
+        beta = torch.FloatTensor([0.5])
+    elif method == 'M3':
+        beta = torch.FloatTensor(
+            [torch.rand(1)*(0.5 - alpha)*alpha if alpha < 0.5 else torch.rand(1)*(alpha - 0.5)*alpha]
+        )
+    else:
+        raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone")
 
     if is_cuda:
         alpha = alpha.cuda()
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py
index 4d11c3026..a415ff1c6 100644
--- a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py
+++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py
@@ -95,9 +95,9 @@ def get_hyperparameter_search_space(
                                                                                 default_value=True,
                                                                                 ),
         weight_decay: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="weight_decay",
-                                                                            value_range=(1E-7, 0.1),
+                                                                            value_range=(1E-5, 0.1),
                                                                             default_value=1E-4,
-                                                                            log=True),
+                                                                            log=False),
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
 
diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
index bf0f23fa6..7302ac6f5 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -112,7 +112,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
         self.train_data_loader = torch.utils.data.DataLoader(
             train_dataset,
             batch_size=min(self.batch_size, len(train_dataset)),
-            shuffle=False,
+            shuffle=True,
             num_workers=X.get('num_workers', 0),
             pin_memory=X.get('pin_memory', True),
             drop_last=X.get('drop_last', False),
diff --git a/examples/tabular/40_advanced/example_custom_configuration_space.py b/examples/tabular/40_advanced/example_custom_configuration_space.py
index 6a3764b94..b95ceeaa5 100644
--- a/examples/tabular/40_advanced/example_custom_configuration_space.py
+++ b/examples/tabular/40_advanced/example_custom_configuration_space.py
@@ -54,6 +54,15 @@ def get_search_space_updates():
                    hyperparameter='ResNetBackbone:dropout',
                    value_range=[0, 0.5],
                    default_value=0.2)
+    updates.append(node_name='network_backbone',
+                   hyperparameter='ResNetBackbone:multi_branch_choice',
+                   value_range=['shake-shake'],
+                   default_value='shake-shake')
+    updates.append(node_name='network_backbone',
+                   hyperparameter='ResNetBackbone:shake_shake_method',
+                   value_range=['M3'],
+                   default_value='M3'
+                   )
     return updates
 
 
@@ -74,7 +83,7 @@ def get_search_space_updates():
     # ==================================================
     api = TabularClassificationTask(
         search_space_updates=get_search_space_updates(),
-        include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'],
+        include_components={'network_backbone': ['ResNetBackbone'],
                             'encoder': ['OneHotEncoder']}
     )
 

From 064e4a93eb0c116611140d0ca21e094d7a91d7a6 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 30 Sep 2021 18:36:42 +0200
Subject: [PATCH 16/54] fix issues with ensemble fitting post hoc

---
 autoPyTorch/api/base_task.py                  | 106 +++++++++++++++---
 autoPyTorch/ensemble/singlebest_ensemble.py   |   5 +-
 .../example_posthoc_ensemble_fit.py           |  81 +++++++++++++
 3 files changed, 173 insertions(+), 19 deletions(-)
 create mode 100644 examples/tabular/40_advanced/example_posthoc_ensemble_fit.py

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 14aa6ab83..c90306f3a 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -887,7 +887,7 @@ def _search(
         # If no dask client was provided, we create one, so that we can
         # start a ensemble process in parallel to smbo optimize
         if (
-            self._dask_client is None and (self.ensemble_size > 0 or self.n_jobs is not None and self.n_jobs > 1)
+            self._dask_client is None and (self.ensemble_size > 0 or self.n_jobs > 1)
         ):
             self._create_dask_client()
         else:
@@ -916,14 +916,16 @@ def _search(
             )
 
         # ============> Run dummy predictions
-        dummy_task_name = 'runDummy'
-        self._stopwatch.start_task(dummy_task_name)
-        self._do_dummy_prediction()
-        self._stopwatch.stop_task(dummy_task_name)
+        # We only want to run dummy predictions in case we want to build an ensemble
+        if self.ensemble_size > 0:
+            dummy_task_name = 'runDummy'
+            self._stopwatch.start_task(dummy_task_name)
+            self._do_dummy_prediction()
+            self._stopwatch.stop_task(dummy_task_name)
 
         # ============> Run traditional ml
-
-        if enable_traditional_pipeline:
+        # We only want to run traditional predictions in case we want to build an ensemble
+        if enable_traditional_pipeline and self.ensemble_size > 0:
             if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
                 self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
             else:
@@ -1341,7 +1343,10 @@ def fit_ensemble(
             ensemble_nbest: int = 50,
             ensemble_size: int = 50,
             precision: int = 32,
-            load_models: bool = True
+            load_models: bool = True,
+            time_for_task: int = 100,
+            func_eval_time_limit_secs: Optional[int] = None,
+            enable_traditional_pipeline: bool = True
     ) -> 'BaseTask':
         """
         Enables post-hoc fitting of the ensemble after the `search()`
@@ -1357,6 +1362,30 @@ def fit_ensemble(
                 Models are drawn with replacement.
             precision (int), (default=32): Numeric precision used when loading
                 ensemble data. Can be either 16, 32 or 64.
+            enable_traditional_pipeline (bool), (default=True):
+                We fit traditional machine learning algorithms
+                (LightGBM, CatBoost, RandomForest, ExtraTrees, KNN, SVM)
+                prior building PyTorch Neural Networks. You can disable this
+                feature by turning this flag to False. All machine learning
+                algorithms that are fitted during search() are considered for
+                ensemble building.
+            load_models (bool), (default=True): Whether to load the
+                models after fitting AutoPyTorch.
+            time_for_task (int), (default=100): Time limit
+                in seconds for the search of appropriate models.
+                By increasing this value, autopytorch has a higher
+                chance of finding better models.
+            func_eval_time_limit_secs (int), (default=None): Time limit
+                for a single call to the machine learning model.
+                Model fitting will be terminated if the machine
+                learning algorithm runs over the time limit. Set
+                this value high enough so that typical machine
+                learning algorithms can be fit on the training
+                data.
+                When set to None, this time will automatically be set to
+                total_walltime_limit // 2 to allow enough time to fit
+                at least 2 individual machine learning algorithms.
+                Set to np.inf in case no time limit is desired.
         Returns:
             self
         """
@@ -1375,8 +1404,55 @@ def fit_ensemble(
         else:
             self._is_dask_client_internally_created = False
 
+        ensemble_fit_task_name = 'EnsembleFit'
+        self._stopwatch.start_task(ensemble_fit_task_name)
+        if enable_traditional_pipeline:
+            if func_eval_time_limit_secs is None or func_eval_time_limit_secs > time_for_task:
+                self._logger.warning(
+                    'Time limit for a single run is higher than total time '
+                    'limit. Capping the limit for a single run to the total '
+                    'time given to Ensemble fit (%f)' % time_for_task
+                )
+                func_eval_time_limit_secs = time_for_task
+
+            # Make sure that at least 2 models are created for the ensemble process
+            num_models = time_for_task // func_eval_time_limit_secs
+            if num_models < 2:
+                func_eval_time_limit_secs = time_for_task // 2
+                self._logger.warning(
+                    "Capping the func_eval_time_limit_secs to {} to have "
+                    "time for a least 2 models to ensemble.".format(
+                        func_eval_time_limit_secs
+                    )
+                )
+        # We only want to run dummy predictions in case we want to build an ensemble
+        dummy_task_name = 'runDummy'
+        self._stopwatch.start_task(dummy_task_name)
+        self._do_dummy_prediction()
+        self._stopwatch.stop_task(dummy_task_name)
+
+        # ============> Run traditional ml
+        # We only want to run traditional predictions in case we want to build an ensemble
+        if enable_traditional_pipeline and self.ensemble_size > 0:
+            if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
+                self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
+            else:
+                traditional_task_name = 'runTraditional'
+                self._stopwatch.start_task(traditional_task_name)
+                elapsed_time = self._stopwatch.wall_elapsed(ensemble_fit_task_name)
+                time_for_traditional = int(
+                    time_for_task - elapsed_time
+                )
+                self._do_traditional_prediction(
+                    func_eval_time_limit_secs=func_eval_time_limit_secs,
+                    time_left=time_for_traditional,
+                )
+                self._stopwatch.stop_task(traditional_task_name)
+
+        elapsed_time = self._stopwatch.wall_elapsed(ensemble_fit_task_name)
+        time_left_for_ensemble = int(time_for_task - elapsed_time)
         manager = self._init_ensemble_builder(
-            time_left_for_ensembles=self._time_for_task,
+            time_left_for_ensembles=time_left_for_ensemble,
             optimize_metric=self.opt_metric,
             precision=precision,
             ensemble_size=ensemble_size,
@@ -1393,16 +1469,10 @@ def fit_ensemble(
 
         if load_models:
             self._load_models()
-        if self._logger is not None:
-            self._logger.info("Closing the dask infrastructure")
-            self._close_dask_client()
-            self._logger.info("Finished closing the dask infrastructure")
 
-            # Clean up the logger
-            self._logger.info("Starting to clean up the logger")
-            self._clean_logger()
-        else:
-            self._close_dask_client()
+        self._stopwatch.stop_task(ensemble_fit_task_name)
+
+        self._cleanup()
 
         return self
 
diff --git a/autoPyTorch/ensemble/singlebest_ensemble.py b/autoPyTorch/ensemble/singlebest_ensemble.py
index c6fbaf576..6f82cbdf4 100644
--- a/autoPyTorch/ensemble/singlebest_ensemble.py
+++ b/autoPyTorch/ensemble/singlebest_ensemble.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory import RunHistory, StatusType
 
 from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
@@ -49,6 +49,9 @@ def get_identifiers_from_run_history(self) -> List[Tuple[int, int, float]]:
 
         for run_key in self.run_history.data.keys():
             run_value = self.run_history.data[run_key]
+            if run_value.status == StatusType.CRASHED:
+                continue
+
             score = self.metric._optimum - (self.metric._sign * run_value.cost)
 
             if (score > best_model_score and self.metric._sign > 0) \
diff --git a/examples/tabular/40_advanced/example_posthoc_ensemble_fit.py b/examples/tabular/40_advanced/example_posthoc_ensemble_fit.py
new file mode 100644
index 000000000..b9383b2a6
--- /dev/null
+++ b/examples/tabular/40_advanced/example_posthoc_ensemble_fit.py
@@ -0,0 +1,81 @@
+"""
+=====================================================
+Tabular Classification with Post-Hoc Ensemble Fitting
+=====================================================
+
+The following example shows how to fit a sample classification model
+and create an ensemble post-hoc with AutoPyTorch
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+
+
+if __name__ == '__main__':
+
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=42,
+    )
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+    api = TabularClassificationTask(
+        ensemble_size=0,
+        seed=42,
+    )
+
+    ############################################################################
+    # Search for the best neural network
+    # ==================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=250,
+        func_eval_time_limit_secs=50
+    )
+
+    ############################################################################
+    # Print the final performance of the incumbent neural network
+    # ===========================================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+
+    ############################################################################
+    # Fit an ensemble with the neural networks fitted during the search
+    # =================================================================
+
+    api.fit_ensemble(ensemble_size=5,
+                     # Set the enable_traditional_pipeline=True
+                     # to also include traditional models
+                     # in the ensemble
+                     enable_traditional_pipeline=False)
+    # Print the final ensemble built by AutoPyTorch
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    print(api.show_models())

From ed48dab3537676a096124378d8fe8eb170b909e4 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 30 Sep 2021 19:14:16 +0200
Subject: [PATCH 17/54] Address comments on the PR

---
 autoPyTorch/api/base_task.py                  | 20 ++++--
 autoPyTorch/data/tabular_feature_validator.py | 69 +++++++++++--------
 2 files changed, 54 insertions(+), 35 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index c90306f3a..4d784b6c2 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -842,6 +842,8 @@ def _search(
             raise ValueError("Incompatible dataset entered for current task,"
                              "expected dataset to have task type :{} got "
                              ":{}".format(self.task_type, dataset.task_type))
+        if precision not in [16, 32, 64]:
+            raise ValueError("precision must be one of 16, 32, 64. Got {}".format(precision))
 
         # Initialise information needed for the experiment
         experiment_task_name: str = 'runSearch'
@@ -1340,19 +1342,24 @@ def fit_pipeline(self,
 
     def fit_ensemble(
             self,
+            optimize_metric: Optional[str] = None,
+            precision: Optional[int] = None,
             ensemble_nbest: int = 50,
             ensemble_size: int = 50,
-            precision: int = 32,
             load_models: bool = True,
             time_for_task: int = 100,
             func_eval_time_limit_secs: Optional[int] = None,
-            enable_traditional_pipeline: bool = True
+            enable_traditional_pipeline: bool = True,
     ) -> 'BaseTask':
         """
         Enables post-hoc fitting of the ensemble after the `search()`
         method is finished. This method creates an ensemble using all
         the models stored on disk during the smbo run
         Args:
+            optimize_metric (str): name of the metric that is used to
+                evaluate a pipeline. if not specified, value passed to search will be used
+            precision (int), (default=32): Numeric precision used when loading
+                ensemble data. Can be either 16, 32 or 64.
             ensemble_nbest (Optional[int]):
                 only consider the ensemble_nbest models to build the ensemble.
                 If None, uses the value stored in class attribute `ensemble_nbest`.
@@ -1360,8 +1367,6 @@ def fit_ensemble(
                 Number of models added to the ensemble built by
                 Ensemble selection from libraries of models.
                 Models are drawn with replacement.
-            precision (int), (default=32): Numeric precision used when loading
-                ensemble data. Can be either 16, 32 or 64.
             enable_traditional_pipeline (bool), (default=True):
                 We fit traditional machine learning algorithms
                 (LightGBM, CatBoost, RandomForest, ExtraTrees, KNN, SVM)
@@ -1395,6 +1400,9 @@ def fit_ensemble(
                              "Please call the `search()` method of {} prior to "
                              "fit_ensemble().".format(self.__class__.__name__))
 
+        if precision not in [16, 32, 64]:
+            raise ValueError("precision must be one of 16, 32, 64. Got {}".format(precision))
+
         if self._logger is None:
             self._logger = self._get_logger(self.dataset.dataset_name)
 
@@ -1453,8 +1461,8 @@ def fit_ensemble(
         time_left_for_ensemble = int(time_for_task - elapsed_time)
         manager = self._init_ensemble_builder(
             time_left_for_ensembles=time_left_for_ensemble,
-            optimize_metric=self.opt_metric,
-            precision=precision,
+            optimize_metric=self.opt_metric if optimize_metric is None else optimize_metric,
+            precision=self.precision if precision is None else precision,
             ensemble_size=ensemble_size,
             ensemble_nbest=ensemble_nbest,
         )
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 28d64a4b1..e4ff2a179 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -1,5 +1,5 @@
 import functools
-import typing
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
 
 import numpy as np
 
@@ -16,12 +16,31 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler, OrdinalEncoder
+from sklearn.preprocessing import StandardScaler
 
 from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES
 
 
-def _create_column_transformer(preprocessors: typing.Dict, numerical_columns, categorical_columns):
+def _create_column_transformer(
+        preprocessors: Dict[str, List[BaseEstimator]],
+        numerical_columns: List[str],
+        categorical_columns: List[str]
+) -> ColumnTransformer:
+    """
+    Given a dictionary of preprocessors, this function 
+    creates a sklearn column transformer with appropriate 
+    columns associated with their preprocessors. 
+    Args:
+        preprocessors (Dict[str, List]): 
+            Dictionary containing list of numerical and categorical preprocessors.
+        numerical_columns (List[int]):
+            List of names of numerical columns
+        categorical_columns (List[int]):
+            List of names of categorical columns
+            
+    Returns:
+        ColumnTransformer
+    """
     numerical_pipeline = 'drop'
     categorical_pipeline = 'drop'
     if len(numerical_columns) > 0:
@@ -36,25 +55,17 @@ def _create_column_transformer(preprocessors: typing.Dict, numerical_columns, ca
     )
 
 
-def get_tabular_preprocessors():
+def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
+    """
+    This function creates a Dictionary containing list 
+    of numerical and categorical preprocessors
+    Returns:
+
+    """
     preprocessors = dict()
     preprocessors['numerical'] = list()
     preprocessors['categorical'] = list()
 
-    # preprocessors['categorical'].append(SimpleImputer(strategy='constant',
-    #               # Train data is numpy
-    #               # as of this point, where
-    #               # Ordinal Encoding is using
-    #               # for categorical. Only
-    #               # Numbers are allowed
-    #               # fill_value='!missing!',
-    #               fill_value=-1,
-    #               copy=False))
-
-    # preprocessors['categorical'].append(OrdinalEncoder(
-    #      handle_unknown='use_encoded_value',
-    #      unknown_value=-1))
-
     preprocessors['categorical'].append(OneHotEncoder(
         categories='auto',
         sparse=False,
@@ -91,7 +102,7 @@ def _fit(
             X = self.numpy_array_to_pandas(X)
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
-            X = typing.cast(pd.DataFrame, X)
+            X = cast(pd.DataFrame, X)
 
             if not X.select_dtypes(include='object').empty:
                 X = self.infer_objects(X)
@@ -179,7 +190,7 @@ def transform(
             X = self.numpy_array_to_pandas(X)
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
-            X = typing.cast(pd.DataFrame, X)
+            X = cast(pd.DataFrame, X)
 
             # Also remove the object dtype for new data
             if not X.select_dtypes(include='object').empty:
@@ -257,7 +268,7 @@ def _check_data(
         # Then for Pandas, we do not support Nan in categorical columns
         if hasattr(X, "iloc"):
             # If entered here, we have a pandas dataframe
-            X = typing.cast(pd.DataFrame, X)
+            X = cast(pd.DataFrame, X)
 
             # Handle objects if possible
             if not X.select_dtypes(include='object').empty:
@@ -293,7 +304,7 @@ def _check_data(
     def _get_columns_info(
         self,
         X: pd.DataFrame,
-    ) -> typing.Tuple[typing.List[str], typing.List[str], typing.List[str]]:
+    ) -> Tuple[List[str], List[str], List[str]]:
         """
         Return the columns to be encoded from a pandas dataframe
 
@@ -365,8 +376,8 @@ def _get_columns_info(
     def list_to_dataframe(
         self,
         X_train: SUPPORTED_FEAT_TYPES,
-        X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None,
-    ) -> typing.Tuple[pd.DataFrame, typing.Optional[pd.DataFrame]]:
+        X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
+    ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
         """
         Converts a list to a pandas DataFrame. In this process, column types are inferred.
 
@@ -376,7 +387,7 @@ def list_to_dataframe(
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
-            X_test (typing.Optional[SUPPORTED_FEAT_TYPES]):
+            X_test (Optional[SUPPORTED_FEAT_TYPES]):
                 A hold out set of data used for checking
         Returns:
             pd.DataFrame:
@@ -398,9 +409,9 @@ def list_to_dataframe(
             X_test = pd.DataFrame(data=X_test).infer_objects()
         return X_train, X_test
 
+    @staticmethod
     def numpy_array_to_pandas(
-        self,
-        X: np.ndarray,
+            X: np.ndarray,
     ) -> pd.DataFrame:
         """
         Converts a numpy array to pandas for type inference
@@ -462,7 +473,7 @@ def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) ->
         # To be on the safe side, map always to the same missing
         # value per column
         if not hasattr(self, 'dict_nancol_to_missing'):
-            self.dict_missing_value_per_col: typing.Dict[str, typing.Any] = {}
+            self.dict_missing_value_per_col: Dict[str, Any] = {}
 
         # First make sure that we do not alter the type of the column which cause:
         # TypeError: '<' not supported between instances of 'int' and 'str'
@@ -478,7 +489,7 @@ def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) ->
                     if can_cast_as_number:
                         # In this case, we expect to have a number as category
                         # it might be string, but its value represent a number
-                        missing_value: typing.Union[str, int] = '-1' if isinstance(X[column].dropna().values[0],
+                        missing_value: Union[str, int] = '-1' if isinstance(X[column].dropna().values[0],
                                                                                    str) else -1
                     else:
                         missing_value = 'Missing!'

From 9cdfb64ec3fe9a138de2eb93db5cebead6b91a20 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 30 Sep 2021 19:55:28 +0200
Subject: [PATCH 18/54] Fix flake and mypy errors

---
 autoPyTorch/api/base_task.py                  |  2 +-
 autoPyTorch/api/tabular_classification.py     |  1 +
 autoPyTorch/api/tabular_regression.py         |  1 +
 autoPyTorch/data/tabular_feature_validator.py | 44 +++++++++----------
 autoPyTorch/datasets/base_dataset.py          | 10 ++++-
 autoPyTorch/pipeline/base_pipeline.py         | 15 ++++---
 .../TabularColumnTransformer.py               |  6 +--
 .../setup/network_backbone/utils.py           |  2 +-
 .../base_network_embedding.py                 | 44 +++++++++----------
 .../training/trainer/RowCutOutTrainer.py      |  7 +--
 .../training/trainer/base_trainer.py          |  2 -
 .../pipeline/tabular_classification.py        |  8 +---
 autoPyTorch/utils/backend.py                  |  2 +-
 13 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 4d784b6c2..27b7cbbb1 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1348,7 +1348,7 @@ def fit_ensemble(
             ensemble_size: int = 50,
             load_models: bool = True,
             time_for_task: int = 100,
-            func_eval_time_limit_secs: Optional[int] = None,
+            func_eval_time_limit_secs: int = 50,
             enable_traditional_pipeline: bool = True,
     ) -> 'BaseTask':
         """
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 1a73d8625..ae2d53ef9 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -275,6 +275,7 @@ def search(
                          y_test=y_test,
                          dataset_name=dataset_name)
 
+        assert self.dataset is not None, "Something went wrong, expected dataset to be initialised"
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index e7fb919bd..0236d861f 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -261,6 +261,7 @@ def search(
                          y_test=y_test,
                          dataset_name=dataset_name)
 
+        assert self.dataset is not None, "Something went wrong, expected dataset to be initialised"
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index e4ff2a179..698e92438 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -9,35 +9,32 @@
 import scipy.sparse
 
 import sklearn.utils
-
 from sklearn.base import BaseEstimator
 from sklearn.compose import ColumnTransformer
 from sklearn.exceptions import NotFittedError
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import OneHotEncoder
 from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES
 
 
 def _create_column_transformer(
-        preprocessors: Dict[str, List[BaseEstimator]],
-        numerical_columns: List[str],
-        categorical_columns: List[str]
+    preprocessors: Dict[str, List[BaseEstimator]],
+    numerical_columns: List[str],
+    categorical_columns: List[str]
 ) -> ColumnTransformer:
     """
-    Given a dictionary of preprocessors, this function 
-    creates a sklearn column transformer with appropriate 
-    columns associated with their preprocessors. 
+    Given a dictionary of preprocessors, this function
+    creates a sklearn column transformer with appropriate
+    columns associated with their preprocessors.
     Args:
-        preprocessors (Dict[str, List]): 
+        preprocessors (Dict[str, List]):
             Dictionary containing list of numerical and categorical preprocessors.
         numerical_columns (List[int]):
             List of names of numerical columns
         categorical_columns (List[int]):
             List of names of categorical columns
-            
     Returns:
         ColumnTransformer
     """
@@ -57,12 +54,12 @@ def _create_column_transformer(
 
 def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
     """
-    This function creates a Dictionary containing list 
+    This function creates a Dictionary containing list
     of numerical and categorical preprocessors
     Returns:
-
+        Dict[str, List[BaseEstimator]]
     """
-    preprocessors = dict()
+    preprocessors: Dict[str, List[BaseEstimator]] = dict()
     preprocessors['numerical'] = list()
     preprocessors['categorical'] = list()
 
@@ -144,12 +141,12 @@ def comparator(cmp1: str, cmp2: str) -> int:
             )
 
             if len(categorical_columns) > 0:
-                print(self.column_transformer.named_transformers_['categorical_pipeline'].named_steps)
                 self.categories = [
                     # We fit an ordinal encoder, where all categorical
                     # columns are shifted to the left
                     list(range(len(cat)))
-                    for cat in self.column_transformer.named_transformers_['categorical_pipeline'].named_steps['onehotencoder'].categories_
+                    for cat in self.column_transformer.named_transformers_[
+                        'categorical_pipeline'].named_steps['onehotencoder'].categories_
                 ]
 
             for i, type_ in enumerate(self.feat_type):
@@ -284,7 +281,7 @@ def _check_data(
                     raise ValueError("Changing the column order of the features after fit() is "
                                      "not supported. Fit() method was called with "
                                      "{} whereas the new features have {} as type".format(self.column_order,
-                                                                                          column_order,)
+                                                                                          column_order, )
                                      )
             else:
                 self.column_order = column_order
@@ -411,7 +408,7 @@ def list_to_dataframe(
 
     @staticmethod
     def numpy_array_to_pandas(
-            X: np.ndarray,
+        X: np.ndarray,
     ) -> pd.DataFrame:
         """
         Converts a numpy array to pandas for type inference
@@ -457,7 +454,9 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
         self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}")
         return X
 
-    def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) -> pd.DataFrame:
+    def impute_nan_in_categories(self,
+                                 X: pd.DataFrame
+                                 ) -> pd.DataFrame:
         """
         impute missing values before encoding,
         remove once sklearn natively supports
@@ -489,8 +488,7 @@ def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) ->
                     if can_cast_as_number:
                         # In this case, we expect to have a number as category
                         # it might be string, but its value represent a number
-                        missing_value: Union[str, int] = '-1' if isinstance(X[column].dropna().values[0],
-                                                                                   str) else -1
+                        missing_value: Union[str, int] = '-1' if isinstance(X[column].dropna().values[0], str) else -1
                     else:
                         missing_value = 'Missing!'
 
@@ -509,4 +507,4 @@ def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) ->
                 X[column].cat.add_categories([self.dict_missing_value_per_col[column]],
                                              inplace=True)
                 X.fillna({column: self.dict_missing_value_per_col[column]}, inplace=True)
-        return X
\ No newline at end of file
+        return X
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index 8cb951977..cf67e1a95 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -330,13 +330,19 @@ def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0)
         to provide training data to fit a pipeline
 
         Args:
-            split (int): The desired subset of the dataset to split and use
+            split_id (int): which split id to get from the splits
+            train (bool): whether the train or valid transforms are to be applied
+            subset (int, default=0): 0 is for train_indices, 1 is for valid_indices
 
         Returns:
+
             Dataset: the reduced dataset to be used for testing
         """
         # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
-        return TransformSubset(self, self.splits[split_id][subset], train=train)
+        assert split_id <= len(self.splits), "Expected split id to be less than length of splits"
+        indices = self.splits[split_id][subset]
+        assert indices is not None, "Trying to get subset when it does not exist"
+        return TransformSubset(self, indices, train=train)
 
     def replace_data(self, X_train: BaseDatasetInputType,
                      X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset':
diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
index 80d59a68f..4697345f4 100644
--- a/autoPyTorch/pipeline/base_pipeline.py
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -451,13 +451,14 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                             continue
                         raise ValueError("Unknown hyperparameter for component {}. "
                                          "Expected update hyperparameter "
-                                         "to be in {} got {}. choice is {}".format(node.__class__.__name__,
-                                                                     component.
-                                                                     get_hyperparameter_search_space(
-                                                                         dataset_properties=self.dataset_properties).
-                                                                     get_hyperparameter_names(),
-                                                                     split_hyperparameter[1],
-                                                                                   component.__name__))
+                                         "to be in {} got {}."
+                                         " component is {}".format(node.__class__.__name__,
+                                                                   component.get_hyperparameter_search_space(
+                                                                       dataset_properties=self.dataset_properties
+                                                                   ).get_hyperparameter_names(),
+                                                                   split_hyperparameter[1],
+                                                                   component.__name__)
+                                         )
             else:
                 if update.hyperparameter not in node.get_hyperparameter_search_space(
                         dataset_properties=self.dataset_properties):
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
index c7ca61e09..e513b8729 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
@@ -3,14 +3,14 @@
 import numpy as np
 
 from sklearn.compose import ColumnTransformer
-from sklearn.pipeline import make_pipeline
-import time
+# from sklearn.pipeline import make_pipeline
+
 import torch
 
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
     autoPyTorchTabularPreprocessingComponent
 )
-from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
+# from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
 from autoPyTorch.utils.common import FitRequirement, subsampler
 
 
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index 9a1f9dd4e..d10d15dca 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -117,7 +117,7 @@ def shake_get_alpha_beta(
         beta = torch.FloatTensor([0.5])
     elif method == 'M3':
         beta = torch.FloatTensor(
-            [torch.rand(1)*(0.5 - alpha)*alpha if alpha < 0.5 else torch.rand(1)*(alpha - 0.5)*alpha]
+            [torch.rand(1) * (0.5 - alpha) * alpha if alpha < 0.5 else torch.rand(1) * (alpha - 0.5) * alpha]
         )
     else:
         raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone")
diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
index 42cbc62bb..18028cddd 100644
--- a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
+++ b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
@@ -1,5 +1,5 @@
-import copy
-from typing import Any, Dict, Optional, Tuple
+# import copy
+from typing import Any, Dict, Optional  # , Tuple
 
 import numpy as np
 
@@ -30,23 +30,23 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
     def build_embedding(self, num_input_features: np.ndarray, num_numerical_features: int) -> nn.Module:
         raise NotImplementedError
-
-    def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
-        # Feature preprocessors can alter numerical columns
-        # if len(X['dataset_properties']['numerical_columns']) == 0:
-        #     num_numerical_columns = 0
-        # else:
-        #     X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
-        #
-        #     numerical_column_transformer = X['tabular_transformer'].preprocessor. \
-        #         named_transformers_['numerical_pipeline']
-        #     num_numerical_columns = numerical_column_transformer.transform(
-        #         X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
-        # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
-        #                               dtype=int)
-        # categories = X['dataset_properties']['categories']
-        #
-        # for i, category in enumerate(categories):
-        #     num_input_features[num_numerical_columns + i, ] = len(category)
-        # return num_numerical_columns, num_input_features
-        return None, None
\ No newline at end of file
+    #
+    # def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
+    #     # Feature preprocessors can alter numerical columns
+    #     # if len(X['dataset_properties']['numerical_columns']) == 0:
+    #     #     num_numerical_columns = 0
+    #     # else:
+    #     #     X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
+    #     #
+    #     #     numerical_column_transformer = X['tabular_transformer'].preprocessor. \
+    #     #         named_transformers_['numerical_pipeline']
+    #     #     num_numerical_columns = numerical_column_transformer.transform(
+    #     #         X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
+    #     # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
+    #     #                               dtype=int)
+    #     # categories = X['dataset_properties']['categories']
+    #     #
+    #     # for i, category in enumerate(categories):
+    #     #     num_input_features[num_numerical_columns + i, ] = len(category)
+    #     # return num_numerical_columns, num_input_features
+    #     return None, None
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
index d7bd23f4e..e04728f4b 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-import torch
+# import torch
 
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent
 from autoPyTorch.pipeline.components.training.trainer.cutout_utils import CutOut
@@ -40,14 +40,15 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         indices = self.random_state.choice(range(size), max(1, np.int32(size * self.patch_ratio)),
                                            replace=False)
 
-        """if not isinstance(self.numerical_columns, typing.Iterable):
+        """
+        if not isinstance(self.numerical_columns, typing.Iterable):
             raise ValueError("{} requires numerical columns information of {}"
                              "to prepare data got {}.".format(self.__class__.__name__,
                                                               typing.Iterable,
                                                               self.numerical_columns))
         numerical_indices = torch.tensor(self.numerical_columns)
         categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns])
-    
+
         # We use an ordinal encoder on the categorical columns of tabular data
         # -1 is the conceptual equivalent to 0 in a image, that does not
         # have color as a feature and hence the network has to learn to deal
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
index 6040f32e9..85ba39c04 100644
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -221,8 +221,6 @@ def __init__(self, weighted_loss: int = 0,
         self.add_fit_requirements([
             FitRequirement("is_cyclic_scheduler", (bool,), user_defined=False, dataset_property=False),
         ])
-        self.batch_fit_times = []
-        self.data_loading_times = []
 
     def prepare(
         self,
diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
index d19fc7215..c1901eb26 100644
--- a/autoPyTorch/pipeline/tabular_classification.py
+++ b/autoPyTorch/pipeline/tabular_classification.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 
-import sklearn.preprocessing
 from sklearn.base import ClassifierMixin
 
 import torch
@@ -91,7 +90,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray:
         loader = self.named_steps['data_loader'].get_loader(X=X)
         pred = self.named_steps['network'].predict(loader)
         if isinstance(self.dataset_properties['output_shape'], int):
-          return pred
+            return pred
 
         else:
             all_proba = []
@@ -140,11 +139,6 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n
                     pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None)
                     y[batch_from:batch_to] = pred_prob.astype(np.float32)
 
-        # Neural networks might not be fit to produce a [0-1] output
-        # For instance, after small number of epochs.
-        # y = np.clip(y, 0, 1)
-        # y = sklearn.preprocessing.normalize(y, axis=1, norm='l1')
-
         return y
 
     def _get_hyperparameter_search_space(self,
diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py
index 7a7399a9f..667e6abd9 100644
--- a/autoPyTorch/utils/backend.py
+++ b/autoPyTorch/utils/backend.py
@@ -328,7 +328,7 @@ def load_datamanager(self) -> BaseDataset:
             with open(filepath, 'rb') as fh:
                 return pickle.load(fh)
 
-    def replace_datamanager(self, datamanager: BaseDataset):
+    def replace_datamanager(self, datamanager: BaseDataset) -> None:
         warnings.warn("Original dataset will be overwritten with the provided dataset")
         os.remove(self._get_datamanager_pickle_filename())
         self.save_datamanager(datamanager=datamanager)

From 6bd43004d8f2c0ef555690c4155f826264174d03 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 4 Oct 2021 12:19:25 +0200
Subject: [PATCH 19/54] Address comments from PR #286

---
 autoPyTorch/datasets/base_dataset.py          |  6 ++--
 autoPyTorch/pipeline/base_pipeline.py         | 30 +++++++++----------
 .../training/data_loader/base_data_loader.py  |  2 +-
 autoPyTorch/utils/backend.py                  | 12 +++++++-
 4 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index cf67e1a95..c1b09d4a9 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -322,7 +322,7 @@ def create_holdout_val_split(
             self.random_state, val_share, self._get_indices(), **kwargs)
         return train, val
 
-    def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0) -> Dataset:
+    def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
         """
         The above split methods employ the Subset to internally subsample the whole dataset.
 
@@ -331,8 +331,7 @@ def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0)
 
         Args:
             split_id (int): which split id to get from the splits
-            train (bool): whether the train or valid transforms are to be applied
-            subset (int, default=0): 0 is for train_indices, 1 is for valid_indices
+            train (bool): whether the dataset is required for training or evaluating.
 
         Returns:
 
@@ -340,6 +339,7 @@ def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0)
         """
         # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
         assert split_id <= len(self.splits), "Expected split id to be less than length of splits"
+        subset = int(not train)
         indices = self.splits[split_id][subset]
         assert indices is not None, "Trying to get subset when it does not exist"
         return TransformSubset(self, indices, train=train)
diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
index 4697345f4..205da414a 100644
--- a/autoPyTorch/pipeline/base_pipeline.py
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -21,8 +21,9 @@
     get_match_array
 )
 from autoPyTorch.utils.common import FitRequirement
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
-
+from autoPyTorch.utils.hyperparameter_search_space_update import (
+    HyperparameterSearchSpaceUpdates
+)
 
 class BasePipeline(Pipeline):
     """Base class for all pipeline objects.
@@ -425,7 +426,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                             if choice in exclude[update.node_name]:
                                 raise ValueError("Found {} in exclude".format(choice))
                         if choice not in components.keys():
-                            raise ValueError("Unknown hyperparameter for choice {}. "
+                            raise ValueError("Unknown component choice for node {}. "
                                              "Expected update hyperparameter "
                                              "to be in {} got {}".format(node.__class__.__name__,
                                                                          components.keys(), choice))
@@ -433,8 +434,8 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                 # needs to be updated is in components of the
                 # choice module
                 elif split_hyperparameter[0] not in components.keys():
-                    raise ValueError("Unknown hyperparameter for choice {}. "
-                                     "Expected update hyperparameter "
+                    raise ValueError("Unknown component choice for node {}. "
+                                     "Expected update component "
                                      "to be in {} got {}".format(node.__class__.__name__,
                                                                  components.keys(), split_hyperparameter[0]))
                 else:
@@ -449,15 +450,14 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                                 component.get_hyperparameter_search_space(
                                     dataset_properties=self.dataset_properties).get_hyperparameter_names()]):
                             continue
-                        raise ValueError("Unknown hyperparameter for component {}. "
-                                         "Expected update hyperparameter "
-                                         "to be in {} got {}."
-                                         " component is {}".format(node.__class__.__name__,
-                                                                   component.get_hyperparameter_search_space(
-                                                                       dataset_properties=self.dataset_properties
-                                                                   ).get_hyperparameter_names(),
-                                                                   split_hyperparameter[1],
-                                                                   component.__name__)
+                        raise ValueError("Unknown hyperparameter for  component {} of node {}. Expected update hyperparameter " 
+                                         "to be in {} got {}.".format(component.__name__,
+                                                                      node.__class__.__name__,
+                                                                      component.get_hyperparameter_search_space(
+                                                                          dataset_properties=self.dataset_properties
+                                                                      ).get_hyperparameter_names(),
+                                                                      split_hyperparameter[1]
+                                                                      )
                                          )
             else:
                 if update.hyperparameter not in node.get_hyperparameter_search_space(
@@ -466,7 +466,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                             node.get_hyperparameter_search_space(
                                 dataset_properties=self.dataset_properties).get_hyperparameter_names()]):
                         continue
-                    raise ValueError("Unknown hyperparameter for component {}. "
+                    raise ValueError("Unknown hyperparameter for node {}. "
                                      "Expected update hyperparameter "
                                      "to be in {} got {}".format(node.__class__.__name__,
                                                                  node.
diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
index 7302ac6f5..15d568002 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -120,7 +120,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
         )
 
         if X['val_indices'] is not None:
-            val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False, subset=1)
+            val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'], train=False)
             self.val_data_loader = torch.utils.data.DataLoader(
                 val_dataset,
                 batch_size=min(self.batch_size, len(val_dataset)),
diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py
index 667e6abd9..8baba0367 100644
--- a/autoPyTorch/utils/backend.py
+++ b/autoPyTorch/utils/backend.py
@@ -17,6 +17,7 @@
 from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
+from torch.utils import data
 
 __all__ = [
     'Backend'
@@ -329,8 +330,17 @@ def load_datamanager(self) -> BaseDataset:
                 return pickle.load(fh)
 
     def replace_datamanager(self, datamanager: BaseDataset) -> None:
+        """
+        This function is called to replace the old datamanager with a datamanager 
+        in case it is required. 
+
+        Args:
+            datamanager (BaseDataset): the new datamanager to replace the old.
+        """
         warnings.warn("Original dataset will be overwritten with the provided dataset")
-        os.remove(self._get_datamanager_pickle_filename())
+        datamanager_pickle_file = self._get_datamanager_pickle_filename()
+        if os.path.exists(datamanager_pickle_file):
+            os.remove(datamanager_pickle_file)
         self.save_datamanager(datamanager=datamanager)
 
     def get_runs_directory(self) -> str:

From 9c0c47b2af7226be2f2e910b271a51fe98e97089 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 4 Oct 2021 12:22:23 +0200
Subject: [PATCH 20/54] fix bug in embedding

---
 .../base_network_embedding.py                 | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
index 18028cddd..d516c4e84 100644
--- a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
+++ b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
@@ -1,5 +1,5 @@
 # import copy
-from typing import Any, Dict, Optional  # , Tuple
+from typing import Any, Dict, Optional, Tuple
 
 import numpy as np
 
@@ -30,23 +30,23 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
     def build_embedding(self, num_input_features: np.ndarray, num_numerical_features: int) -> nn.Module:
         raise NotImplementedError
-    #
-    # def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
-    #     # Feature preprocessors can alter numerical columns
-    #     # if len(X['dataset_properties']['numerical_columns']) == 0:
-    #     #     num_numerical_columns = 0
-    #     # else:
-    #     #     X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
-    #     #
-    #     #     numerical_column_transformer = X['tabular_transformer'].preprocessor. \
-    #     #         named_transformers_['numerical_pipeline']
-    #     #     num_numerical_columns = numerical_column_transformer.transform(
-    #     #         X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
-    #     # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
-    #     #                               dtype=int)
-    #     # categories = X['dataset_properties']['categories']
-    #     #
-    #     # for i, category in enumerate(categories):
-    #     #     num_input_features[num_numerical_columns + i, ] = len(category)
-    #     # return num_numerical_columns, num_input_features
-    #     return None, None
+    
+    def _get_args(self, X: Dict[str, Any]) -> Tuple[None, None]:  # Tuple[int, np.ndarray]:
+        # Feature preprocessors can alter numerical columns
+        # if len(X['dataset_properties']['numerical_columns']) == 0:
+        #     num_numerical_columns = 0
+        # else:
+        #     X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
+        #
+        #     numerical_column_transformer = X['tabular_transformer'].preprocessor. \
+        #         named_transformers_['numerical_pipeline']
+        #     num_numerical_columns = numerical_column_transformer.transform(
+        #         X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
+        # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
+        #                               dtype=int)
+        # categories = X['dataset_properties']['categories']
+        #
+        # for i, category in enumerate(categories):
+        #     num_input_features[num_numerical_columns + i, ] = len(category)
+        # return num_numerical_columns, num_input_features
+        return None, None

From e83800451c48e1bb56aa2f8eb4b793f7d9cd5651 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 12:24:48 +0200
Subject: [PATCH 21/54] Update autoPyTorch/api/tabular_classification.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/api/tabular_classification.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index ae2d53ef9..06d2aacb1 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -275,7 +275,8 @@ def search(
                          y_test=y_test,
                          dataset_name=dataset_name)
 
-        assert self.dataset is not None, "Something went wrong, expected dataset to be initialised"
+        if self.dataset is None:
+                raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,

From 893a15dca0bba22fa34a08c88c5a7fe78c4ca074 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 12:25:13 +0200
Subject: [PATCH 22/54] Update autoPyTorch/datasets/base_dataset.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/datasets/base_dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index c1b09d4a9..db18f0315 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -338,7 +338,8 @@ def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
             Dataset: the reduced dataset to be used for testing
         """
         # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
-        assert split_id <= len(self.splits), "Expected split id to be less than length of splits"
+        if split_id >= len(self.splits):  # old version: split_id > len(self.splits)
+            raise IndexError("split_id out of range, got split_id={} (>= num_splits={})".format(split_id, len(self.splits)))
         subset = int(not train)
         indices = self.splits[split_id][subset]
         assert indices is not None, "Trying to get subset when it does not exist"

From ed0602c9888d04ad6ebc3f4b26fc8841f4ee0306 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 12:25:20 +0200
Subject: [PATCH 23/54] Update autoPyTorch/datasets/base_dataset.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/datasets/base_dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index db18f0315..22163031c 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -342,7 +342,8 @@ def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
             raise IndexError("split_id out of range, got split_id={} (>= num_splits={})".format(split_id, len(self.splits)))
         subset = int(not train)
         indices = self.splits[split_id][subset]
-        assert indices is not None, "Trying to get subset when it does not exist"
+        if indices is None:
+                raise ValueError("Specified fold (or subset) does not exist")
         return TransformSubset(self, indices, train=train)
 
     def replace_data(self, X_train: BaseDatasetInputType,

From 224c69ea1cb42f160c41a6808fe655444c5d01a2 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 12:25:46 +0200
Subject: [PATCH 24/54] Update
 autoPyTorch/pipeline/components/training/trainer/base_trainer.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 .../pipeline/components/training/trainer/base_trainer.py  | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
index 85ba39c04..60bf7a69b 100644
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -318,11 +318,9 @@ def on_epoch_end(self, X: Dict[str, Any], epoch: int) -> bool:
                 if self.use_snapshot_ensemble:
                     assert self.model_snapshots is not None, "model snapshots container can't be " \
                                                              "none when snapshot ensembling is enabled"
-                    if epoch == self.budget_tracker.max_epochs:
-                        if self.use_stochastic_weight_averaging:
-                            model_copy = deepcopy(self.swa_model)
-                        else:
-                            model_copy = deepcopy(self.model)
+                    is_last_epoch = (epoch == self.budget_tracker.max_epochs)
+                    if is_last_epoch and self.use_stochastic_weight_averaging:
+                        model_copy = deepcopy(self.swa_model)
                     else:
                         model_copy = deepcopy(self.model)
 

From e61c1a31eea82519e16a2024eb808a3fb1633b05 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 4 Oct 2021 12:28:37 +0200
Subject: [PATCH 25/54] Address comments from shuhei

---
 autoPyTorch/api/tabular_regression.py          | 3 ++-
 autoPyTorch/pipeline/tabular_classification.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 0236d861f..8742549af 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -261,7 +261,8 @@ def search(
                          y_test=y_test,
                          dataset_name=dataset_name)
 
-        assert self.dataset is not None, "Something went wrong, expected dataset to be initialised"
+        if self.dataset is None:
+            raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
index c1901eb26..b059c783c 100644
--- a/autoPyTorch/pipeline/tabular_classification.py
+++ b/autoPyTorch/pipeline/tabular_classification.py
@@ -102,7 +102,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray:
                 proba_k /= normalizer
                 all_proba.append(proba_k)
 
-            return all_proba
+            return np.ndarray(all_proba)
 
     def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray:
         """predict_proba.

From 3d47afa6d9afe9b5a5af27b10e971f07e2a45ec4 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 4 Oct 2021 15:50:39 +0200
Subject: [PATCH 26/54] adress comments from shuhei

---
 .../training/trainer/AdversarialTrainer.py    |  2 +-
 .../training/trainer/RowCutMixTrainer.py      | 15 ++++-----
 .../training/trainer/RowCutOutTrainer.py      | 33 ++++---------------
 3 files changed, 14 insertions(+), 36 deletions(-)

diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
index 36d586919..709ee197f 100644
--- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
@@ -150,7 +150,7 @@ def get_properties(dataset_properties: Optional[Dict[str, Any]] = None
             'shortname': 'AdversarialTrainer',
             'name': 'AdversarialTrainer',
             'handles_tabular': True,
-            'handles_image': False,
+            'handles_image': True,
             'handles_time_series': False,
         }
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index f1b606046..53500741b 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -1,4 +1,4 @@
-import typing
+from typing import Any, Dict, Optional, Tuple, Union
 
 import numpy as np
 
@@ -11,7 +11,7 @@
 class RowCutMixTrainer(MixUp, BaseTrainerComponent):
 
     def data_preparation(self, X: np.ndarray, y: np.ndarray,
-                         ) -> typing.Tuple[np.ndarray, typing.Dict[str, np.ndarray]]:
+                         ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
         """
         Depending on the trainer choice, data fed to the network might be pre-processed
         on a different way. That is, in standard training we provide the data to the
@@ -28,29 +28,28 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         """
         beta = 1.0
         lam = self.random_state.beta(beta, beta)
-        batch_size = X.size()[0]
+        batch_size, n_columns = np.shape(X)
         index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)
 
         r = self.random_state.rand(1)
         if beta <= 0 or r > self.alpha:
             return X, {'y_a': y, 'y_b': y[index], 'lam': 1}
 
-        size = X.shape[1]
-        indices = torch.tensor(self.random_state.choice(range(size), max(1, np.int32(size * lam)),
+        indices = torch.tensor(self.random_state.choice(range(batch_size), max(1, np.int32(n_columns * lam)),
                                                         replace=False))
 
         X[:, indices] = X[index, :][:, indices]
 
         # Adjust lam
-        lam = 1 - ((len(indices)) / (X.size()[1]))
+        lam = 1 - ((len(indices)) / (n_columns))
 
         y_a, y_b = y, y[index]
 
         return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 
     @staticmethod
-    def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.Any]] = None
-                       ) -> typing.Dict[str, typing.Union[str, bool]]:
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None
+                       ) -> Dict[str, Union[str, bool]]:
         return {
             'shortname': 'RowCutMixTrainer',
             'name': 'MixUp Regularized with Cutoff Tabular Trainer',
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
index e04728f4b..fffc35476 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
@@ -1,19 +1,15 @@
-import typing
+from typing import Any, Dict, Optional, Tuple, Union
 
 import numpy as np
 
-# import torch
-
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent
 from autoPyTorch.pipeline.components.training.trainer.cutout_utils import CutOut
 
 
 class RowCutOutTrainer(CutOut, BaseTrainerComponent):
-    NUMERICAL_VALUE = 0
-    CATEGORICAL_VALUE = -1
 
     def data_preparation(self, X: np.ndarray, y: np.ndarray,
-                         ) -> typing.Tuple[np.ndarray, typing.Dict[str, np.ndarray]]:
+                         ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
         """
         Depending on the trainer choice, data fed to the network might be pre-processed
         on a different way. That is, in standard training we provide the data to the
@@ -26,7 +22,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
 
         Returns:
             np.ndarray: that processes data
-            typing.Dict[str, np.ndarray]: arguments to the criterion function
+            Dict[str, np.ndarray]: arguments to the criterion function
         """
 
         r = self.random_state.rand(1)
@@ -36,27 +32,10 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
             lam = 1
             return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 
-        size = X.shape[1]
+        size: int = np.shape(X)[1]
         indices = self.random_state.choice(range(size), max(1, np.int32(size * self.patch_ratio)),
                                            replace=False)
 
-        """
-        if not isinstance(self.numerical_columns, typing.Iterable):
-            raise ValueError("{} requires numerical columns information of {}"
-                             "to prepare data got {}.".format(self.__class__.__name__,
-                                                              typing.Iterable,
-                                                              self.numerical_columns))
-        numerical_indices = torch.tensor(self.numerical_columns)
-        categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns])
-
-        # We use an ordinal encoder on the categorical columns of tabular data
-        # -1 is the conceptual equivalent to 0 in a image, that does not
-        # have color as a feature and hence the network has to learn to deal
-        # without this data. For numerical columns we use 0 to cutout the features
-        # similar to the effect that setting 0 as a pixel value in an image.
-        X[:, categorical_indices.long()] = self.CATEGORICAL_VALUE
-        X[:, numerical_indices.long()] = self.NUMERICAL_VALUE
-        """
         X[:, indices] = 0
         lam = 1
         y_a = y
@@ -64,8 +43,8 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 
     @staticmethod
-    def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.Any]] = None
-                       ) -> typing.Dict[str, typing.Union[str, bool]]:
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None
+                       ) -> Dict[str, Union[str, bool]]:
         return {
             'shortname': 'RowCutOutTrainer',
             'name': 'RowCutOutTrainer',

From b41734617e014be1fff3cd28765ddcb274cf116c Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 4 Oct 2021 15:56:12 +0200
Subject: [PATCH 27/54] fix flake and mypy

---
 autoPyTorch/api/tabular_classification.py                    | 2 +-
 autoPyTorch/datasets/base_dataset.py                         | 5 +++--
 autoPyTorch/pipeline/base_pipeline.py                        | 4 +++-
 .../setup/network_embedding/base_network_embedding.py        | 2 +-
 autoPyTorch/utils/backend.py                                 | 5 ++---
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 06d2aacb1..7be504f6d 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -276,7 +276,7 @@ def search(
                          dataset_name=dataset_name)
 
         if self.dataset is None:
-                raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
+            raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index 22163031c..f041be5ec 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -339,11 +339,12 @@ def get_dataset_for_training(self, split_id: int, train: bool) -> Dataset:
         """
         # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
         if split_id >= len(self.splits):  # old version: split_id > len(self.splits)
-            raise IndexError("split_id out of range, got split_id={} (>= num_splits={})".format(split_id, len(self.splits)))
+            raise IndexError("split_id out of range, got split_id={}"
+                             " (>= num_splits={})".format(split_id, len(self.splits)))
         subset = int(not train)
         indices = self.splits[split_id][subset]
         if indices is None:
-                raise ValueError("Specified fold (or subset) does not exist")
+            raise ValueError("Specified fold (or subset) does not exist")
         return TransformSubset(self, indices, train=train)
 
     def replace_data(self, X_train: BaseDatasetInputType,
diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
index 205da414a..51db438e8 100644
--- a/autoPyTorch/pipeline/base_pipeline.py
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -25,6 +25,7 @@
     HyperparameterSearchSpaceUpdates
 )
 
+
 class BasePipeline(Pipeline):
     """Base class for all pipeline objects.
     Notes
@@ -450,7 +451,8 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                                 component.get_hyperparameter_search_space(
                                     dataset_properties=self.dataset_properties).get_hyperparameter_names()]):
                             continue
-                        raise ValueError("Unknown hyperparameter for  component {} of node {}. Expected update hyperparameter " 
+                        raise ValueError("Unknown hyperparameter for  component {} of node {}."
+                                         " Expected update hyperparameter "
                                          "to be in {} got {}.".format(component.__name__,
                                                                       node.__class__.__name__,
                                                                       component.get_hyperparameter_search_space(
diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
index d516c4e84..6feac0fba 100644
--- a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
+++ b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
@@ -30,7 +30,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
     def build_embedding(self, num_input_features: np.ndarray, num_numerical_features: int) -> nn.Module:
         raise NotImplementedError
-    
+
     def _get_args(self, X: Dict[str, Any]) -> Tuple[None, None]:  # Tuple[int, np.ndarray]:
         # Feature preprocessors can alter numerical columns
         # if len(X['dataset_properties']['numerical_columns']) == 0:
diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py
index 8baba0367..85160af42 100644
--- a/autoPyTorch/utils/backend.py
+++ b/autoPyTorch/utils/backend.py
@@ -17,7 +17,6 @@
 from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
-from torch.utils import data
 
 __all__ = [
     'Backend'
@@ -331,8 +330,8 @@ def load_datamanager(self) -> BaseDataset:
 
     def replace_datamanager(self, datamanager: BaseDataset) -> None:
         """
-        This function is called to replace the old datamanager with a datamanager 
-        in case it is required. 
+        This function is called to replace the old datamanager with a datamanager
+        in case it is required.
 
         Args:
             datamanager (BaseDataset): the new datamanager to replace the old.

From 23541592261cd35234f838e1c89753d9ec621fdb Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 16:12:53 +0200
Subject: [PATCH 28/54] Update
 autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 .../pipeline/components/training/trainer/RowCutMixTrainer.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index 53500741b..6b4f7b343 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -41,7 +41,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         X[:, indices] = X[index, :][:, indices]
 
         # Adjust lam
-        lam = 1 - ((len(indices)) / (n_columns))
+        lam = 1 - (len(indices) / n_columns)
 
         y_a, y_b = y, y[index]
 

From 7e59f4de8564a36b63f7cae0281e45836d50e3a0 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 16:13:19 +0200
Subject: [PATCH 29/54] Update autoPyTorch/pipeline/tabular_classification.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/pipeline/tabular_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
index b059c783c..926d6308c 100644
--- a/autoPyTorch/pipeline/tabular_classification.py
+++ b/autoPyTorch/pipeline/tabular_classification.py
@@ -102,7 +102,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray:
                 proba_k /= normalizer
                 all_proba.append(proba_k)
 
-            return np.ndarray(all_proba)
+            return np.array(all_proba)
 
     def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray:
         """predict_proba.

From 7ab5d267e576042328207e42d9f6e51497b641aa Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 16:48:30 +0200
Subject: [PATCH 30/54] Update
 autoPyTorch/pipeline/components/setup/network_backbone/utils.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 .../pipeline/components/setup/network_backbone/utils.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index d10d15dca..609c364aa 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -116,8 +116,10 @@ def shake_get_alpha_beta(
     elif method in ['shake-even', 'even-even']:
         beta = torch.FloatTensor([0.5])
     elif method == 'M3':
+        # Table 4 in the paper `Shake-Shake regularization`
+        rnd = torch.rand(1)
         beta = torch.FloatTensor(
-            [torch.rand(1) * (0.5 - alpha) * alpha if alpha < 0.5 else torch.rand(1) * (alpha - 0.5) * alpha]
+            [rnd * (0.5 - alpha) + alpha if alpha < 0.5 else rnd * (alpha - 0.5) + 0.5]
         )
     else:
         raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone")

From 0032834f41f698eee6514e308f9b3d64d37dfe48 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 17:47:43 +0200
Subject: [PATCH 31/54] Update
 autoPyTorch/pipeline/components/setup/network_backbone/utils.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/pipeline/components/setup/network_backbone/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index 609c364aa..b0675eb14 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -100,6 +100,7 @@ def shake_get_alpha_beta(
     """
     The methods used in this function have been introduced in 'ShakeShake Regularisation'
     https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well.
+    Currently, this function supports `even-even`, `shake-even` and `shake-shake`
     """
     if not is_training:
         result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))

From 90ce40c07c6aa2f9cdef98d0801f0b511366f3b5 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 17:47:59 +0200
Subject: [PATCH 32/54] Update
 autoPyTorch/pipeline/components/setup/network_backbone/utils.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/pipeline/components/setup/network_backbone/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index b0675eb14..fefe85e7a 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -107,6 +107,7 @@ def shake_get_alpha_beta(
         return result if not is_cuda else (result[0].cuda(), result[1].cuda())
 
     # TODO implement other update methods
+    # alpha is the weight ratio for the forward pass and beta is that for the backward pass
     if method == 'even-even':
         alpha = torch.FloatTensor([0.5])
     else:

From f51d2390ec23cf5c2b23b295689806496b411146 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 4 Oct 2021 17:49:16 +0200
Subject: [PATCH 33/54] Apply suggestions from code review

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/pipeline/base_pipeline.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
index 51db438e8..7d4fd17a9 100644
--- a/autoPyTorch/pipeline/base_pipeline.py
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -429,7 +429,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                         if choice not in components.keys():
                             raise ValueError("Unknown component choice for node {}. "
                                              "Expected update hyperparameter "
-                                             "to be in {} got {}".format(node.__class__.__name__,
+                                             "to be in {}, but got {}".format(node.__class__.__name__,
                                                                          components.keys(), choice))
                 # check if the component whose hyperparameter
                 # needs to be updated is in components of the
@@ -437,7 +437,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                 elif split_hyperparameter[0] not in components.keys():
                     raise ValueError("Unknown component choice for node {}. "
                                      "Expected update component "
-                                     "to be in {} got {}".format(node.__class__.__name__,
+                                     "to be in {}, but got {}".format(node.__class__.__name__,
                                                                  components.keys(), split_hyperparameter[0]))
                 else:
                     # check if hyperparameter is in the search space of the component
@@ -453,7 +453,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                             continue
                         raise ValueError("Unknown hyperparameter for  component {} of node {}."
                                          " Expected update hyperparameter "
-                                         "to be in {} got {}.".format(component.__name__,
+                                         "to be in {}, but got {}.".format(component.__name__,
                                                                       node.__class__.__name__,
                                                                       component.get_hyperparameter_search_space(
                                                                           dataset_properties=self.dataset_properties
@@ -470,7 +470,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                         continue
                     raise ValueError("Unknown hyperparameter for node {}. "
                                      "Expected update hyperparameter "
-                                     "to be in {} got {}".format(node.__class__.__name__,
+                                     "to be in {}, but got {}".format(node.__class__.__name__,
                                                                  node.
                                                                  get_hyperparameter_search_space(
                                                                      dataset_properties=self.dataset_properties).

From 42e6b5ae631d0075b7a64e2dfd018ea0eb46ab55 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 4 Oct 2021 22:57:21 +0200
Subject: [PATCH 34/54] increase threads_per_worker

---
 autoPyTorch/api/base_task.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 27b7cbbb1..189919013 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -412,7 +412,7 @@ def _create_dask_client(self) -> None:
             dask.distributed.LocalCluster(
                 n_workers=self.n_jobs,
                 processes=True,
-                threads_per_worker=1,
+                threads_per_worker=2,
                 # We use the temporal directory to save the
                 # dask workers, because deleting workers
                 # more time than deleting backend directories

From f79a4fc895aa18403bdcdc89226893918d9278a4 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 5 Oct 2021 12:11:15 +0200
Subject: [PATCH 35/54] fix bug in rowcutmix

---
 .../pipeline/components/training/trainer/RowCutMixTrainer.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index 6b4f7b343..9ab76ed59 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -35,7 +35,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         if beta <= 0 or r > self.alpha:
             return X, {'y_a': y, 'y_b': y[index], 'lam': 1}
 
-        indices = torch.tensor(self.random_state.choice(range(batch_size), max(1, np.int32(n_columns * lam)),
+        indices = torch.tensor(self.random_state.choice(range(n_columns), max(1, np.int32(n_columns * lam)),
                                                         replace=False))
 
         X[:, indices] = X[index, :][:, indices]

From 6d9f99f3c71732e5a03be64505f32a63f2b5e6c9 Mon Sep 17 00:00:00 2001
From: Arlind Kadra <arlindkadra@gmail.com>
Date: Fri, 8 Oct 2021 17:30:52 +0200
Subject: [PATCH 36/54] Enhancement for the tabular validator. (#291)

* Initial try at an enhancement for the tabular validator

* Adding a few type annotations

* Fixing bugs in implementation

* Adding wrongly deleted code part during rebase

* Fix bug in _get_args

* Fix bug in _get_args

* Addressing Shuhei's comments

* Address Shuhei's comments

* Refactoring code

* Refactoring code

* Typos fix and additional comments

* Replace nan in categoricals with simple imputer

* Remove unused function

* add comment

* Update autoPyTorch/data/tabular_feature_validator.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>

* Update autoPyTorch/data/tabular_feature_validator.py

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>

* Adding unit test for only nall columns in the tabular feature categorical evaluator

* fix bug in remove all nan columns

* Bug fix for making tests run by arlind

* fix flake errors in feature validator

* made typing code uniform

* Apply suggestions from code review

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>

* address comments from shuhei

* address comments from shuhei (2)

Co-authored-by: Ravin Kohli <kohliravin7@gmail.com>
Co-authored-by: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/data/base_feature_validator.py    |  73 +++--
 autoPyTorch/data/base_target_validator.py     |  40 +--
 autoPyTorch/data/tabular_feature_validator.py | 269 ++++++++----------
 test/test_data/test_feature_validator.py      | 108 ++++++-
 4 files changed, 290 insertions(+), 200 deletions(-)

diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py
index 0106a3aa8..9ed46d6e6 100644
--- a/autoPyTorch/data/base_feature_validator.py
+++ b/autoPyTorch/data/base_feature_validator.py
@@ -1,5 +1,5 @@
 import logging
-import typing
+from typing import List, Optional, Set, Tuple, Union
 
 import numpy as np
 
@@ -12,8 +12,8 @@
 from autoPyTorch.utils.logging_ import PicklableClientLogger
 
 
-SUPPORTED_FEAT_TYPES = typing.Union[
-    typing.List,
+SUPPORTED_FEAT_TYPES = Union[
+    List,
     pd.DataFrame,
     np.ndarray,
     scipy.sparse.bsr_matrix,
@@ -35,43 +35,44 @@ class BaseFeatureValidator(BaseEstimator):
             List of the column types found by this estimator during fit.
         data_type (str):
             Class name of the data type provided during fit.
-        encoder (typing.Optional[BaseEstimator])
+        encoder (Optional[BaseEstimator])
             Host a encoder object if the data requires transformation (for example,
             if provided a categorical column in a pandas DataFrame)
-        enc_columns (typing.List[str])
+        enc_columns (List[str])
             List of columns that were encoded.
     """
     def __init__(self,
-                 logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
-                                                      ]] = None,
+                 logger: Optional[Union[PicklableClientLogger, logging.Logger
+                                        ]
+                                  ] = None,
                  ) -> None:
         # Register types to detect unsupported data format changes
-        self.feat_type = None  # type: typing.Optional[typing.List[str]]
-        self.data_type = None  # type: typing.Optional[type]
-        self.dtypes = []  # type: typing.List[str]
-        self.column_order = []  # type: typing.List[str]
+        self.feat_type: Optional[List[str]] = None
+        self.data_type: Optional[type] = None
+        self.dtypes: List[str] = []
+        self.column_order: List[str] = []
 
-        self.encoder = None  # type: typing.Optional[BaseEstimator]
-        self.enc_columns = []  # type: typing.List[str]
+        self.encoder: Optional[BaseEstimator] = None
+        self.enc_columns: List[str] = []
 
-        self.logger: typing.Union[
+        self.logger: Union[
             PicklableClientLogger, logging.Logger
         ] = logger if logger is not None else logging.getLogger(__name__)
 
         # Required for dataset properties
-        self.num_features = None  # type: typing.Optional[int]
-        self.categories = []  # type: typing.List[typing.List[int]]
-        self.categorical_columns: typing.List[int] = []
-        self.numerical_columns: typing.List[int] = []
-        # column identifiers may be integers or strings
-        self.null_columns: typing.Set[str] = set()
+        self.num_features: Optional[int] = None
+        self.categories: List[List[int]] = []
+        self.categorical_columns: List[int] = []
+        self.numerical_columns: List[int] = []
+
+        self.all_nan_columns: Optional[Set[Union[int, str]]] = None
 
         self._is_fitted = False
 
     def fit(
         self,
         X_train: SUPPORTED_FEAT_TYPES,
-        X_test: typing.Optional[SUPPORTED_FEAT_TYPES] = None,
+        X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
     ) -> BaseEstimator:
         """
         Validates and fit a categorical encoder (if needed) to the features.
@@ -82,7 +83,7 @@ def fit(
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
-            X_test (typing.Optional[SUPPORTED_FEAT_TYPES]):
+            X_test (Optional[SUPPORTED_FEAT_TYPES]):
                 A hold out set of data used for checking
         """
 
@@ -122,6 +123,7 @@ def _fit(
             self:
                 The fitted base estimator
         """
+
         raise NotImplementedError()
 
     def _check_data(
@@ -136,6 +138,7 @@ def _check_data(
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
         """
+
         raise NotImplementedError()
 
     def transform(
@@ -152,4 +155,30 @@ def transform(
             np.ndarray:
                 The transformed array
         """
+
+        raise NotImplementedError()
+
+    def list_to_dataframe(
+        self,
+        X_train: SUPPORTED_FEAT_TYPES,
+        X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
+    ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
+        """
+        Converts a list to a pandas DataFrame. In this process, column types are inferred.
+
+        If test data is provided, we proactively match it to train data
+
+        Arguments:
+            X_train (SUPPORTED_FEAT_TYPES):
+                A set of features that are going to be validated (type and dimensionality
+                checks) and a encoder fitted in the case the data needs encoding
+            X_test (Optional[SUPPORTED_FEAT_TYPES]):
+                A hold out set of data used for checking
+        Returns:
+            pd.DataFrame:
+                transformed train data from list to pandas DataFrame
+            pd.DataFrame:
+                transformed test data from list to pandas DataFrame
+        """
+
         raise NotImplementedError()
diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py
index dba9c19e3..0fb318476 100644
--- a/autoPyTorch/data/base_target_validator.py
+++ b/autoPyTorch/data/base_target_validator.py
@@ -1,5 +1,5 @@
 import logging
-import typing
+from typing import List, Optional, Union, cast
 
 import numpy as np
 
@@ -12,8 +12,8 @@
 from autoPyTorch.utils.logging_ import PicklableClientLogger
 
 
-SUPPORTED_TARGET_TYPES = typing.Union[
-    typing.List,
+SUPPORTED_TARGET_TYPES = Union[
+    List,
     pd.Series,
     pd.DataFrame,
     np.ndarray,
@@ -35,39 +35,39 @@ class BaseTargetValidator(BaseEstimator):
         is_classification (bool):
             A bool that indicates if the validator should operate in classification mode.
             During classification, the targets are encoded.
-        encoder (typing.Optional[BaseEstimator]):
+        encoder (Optional[BaseEstimator]):
             Host a encoder object if the data requires transformation (for example,
             if provided a categorical column in a pandas DataFrame)
-        enc_columns (typing.List[str])
+        enc_columns (List[str])
             List of columns that where encoded
     """
     def __init__(self,
                  is_classification: bool = False,
-                 logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
+                 logger: Optional[Union[PicklableClientLogger, logging.Logger
                                                       ]] = None,
                  ) -> None:
         self.is_classification = is_classification
 
-        self.data_type = None  # type: typing.Optional[type]
+        self.data_type: Optional[type] = None
 
-        self.encoder = None  # type: typing.Optional[BaseEstimator]
+        self.encoder: Optional[BaseEstimator] = None
 
-        self.out_dimensionality = None  # type: typing.Optional[int]
-        self.type_of_target = None  # type: typing.Optional[str]
+        self.out_dimensionality: Optional[int] = None
+        self.type_of_target: Optional[str] = None
 
-        self.logger: typing.Union[
+        self.logger: Union[
             PicklableClientLogger, logging.Logger
         ] = logger if logger is not None else logging.getLogger(__name__)
 
         # Store the dtype for remapping to correct type
-        self.dtype = None  # type: typing.Optional[type]
+        self.dtype: Optional[type] = None
 
         self._is_fitted = False
 
     def fit(
         self,
         y_train: SUPPORTED_TARGET_TYPES,
-        y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
+        y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
     ) -> BaseEstimator:
         """
         Validates and fit a categorical encoder (if needed) to the targets
@@ -76,7 +76,7 @@ def fit(
         Arguments:
             y_train (SUPPORTED_TARGET_TYPES)
                 A set of targets set aside for training
-            y_test (typing.Union[SUPPORTED_TARGET_TYPES])
+            y_test (Union[SUPPORTED_TARGET_TYPES])
                 A hold out set of data used of the targets. It is also used to fit the
                 categories of the encoder.
         """
@@ -95,8 +95,8 @@ def fit(
                                      np.shape(y_test)
                                  ))
             if isinstance(y_train, pd.DataFrame):
-                y_train = typing.cast(pd.DataFrame, y_train)
-                y_test = typing.cast(pd.DataFrame, y_test)
+                y_train = cast(pd.DataFrame, y_train)
+                y_test = cast(pd.DataFrame, y_test)
                 if y_train.columns.tolist() != y_test.columns.tolist():
                     raise ValueError(
                         "Train and test targets must both have the same columns, yet "
@@ -127,21 +127,21 @@ def fit(
     def _fit(
         self,
         y_train: SUPPORTED_TARGET_TYPES,
-        y_test: typing.Optional[SUPPORTED_TARGET_TYPES] = None,
+        y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
     ) -> BaseEstimator:
         """
         Arguments:
             y_train (SUPPORTED_TARGET_TYPES)
                 The labels of the current task. They are going to be encoded in case
                 of classification
-            y_test (typing.Optional[SUPPORTED_TARGET_TYPES])
+            y_test (Optional[SUPPORTED_TARGET_TYPES])
                 A holdout set of labels
         """
         raise NotImplementedError()
 
     def transform(
         self,
-        y: typing.Union[SUPPORTED_TARGET_TYPES],
+        y: Union[SUPPORTED_TARGET_TYPES],
     ) -> np.ndarray:
         """
         Arguments:
@@ -162,7 +162,7 @@ def inverse_transform(
         Revert any encoding transformation done on a target array
 
         Arguments:
-            y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]):
+            y (Union[np.ndarray, pd.DataFrame, pd.Series]):
                 Target array to be transformed back to original form before encoding
         Returns:
             np.ndarray:
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 698e92438..3f939bc98 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -1,5 +1,5 @@
 import functools
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Dict, List, Optional, Tuple, cast
 
 import numpy as np
 
@@ -22,7 +22,7 @@
 def _create_column_transformer(
     preprocessors: Dict[str, List[BaseEstimator]],
     numerical_columns: List[str],
-    categorical_columns: List[str]
+    categorical_columns: List[str],
 ) -> ColumnTransformer:
     """
     Given a dictionary of preprocessors, this function
@@ -38,6 +38,7 @@ def _create_column_transformer(
     Returns:
         ColumnTransformer
     """
+
     numerical_pipeline = 'drop'
     categorical_pipeline = 'drop'
     if len(numerical_columns) > 0:
@@ -48,7 +49,7 @@ def _create_column_transformer(
     return ColumnTransformer([
         ('categorical_pipeline', categorical_pipeline, categorical_columns),
         ('numerical_pipeline', numerical_pipeline, numerical_columns)],
-        remainder='passthrough'
+        remainder='drop'
     )
 
 
@@ -60,21 +61,23 @@ def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
         Dict[str, List[BaseEstimator]]
     """
     preprocessors: Dict[str, List[BaseEstimator]] = dict()
-    preprocessors['numerical'] = list()
-    preprocessors['categorical'] = list()
 
-    preprocessors['categorical'].append(OneHotEncoder(
-        categories='auto',
-        sparse=False,
-        handle_unknown='ignore'))
-    preprocessors['numerical'].append(SimpleImputer(strategy='median',
-                                                    copy=False))
-    preprocessors['numerical'].append(StandardScaler(with_mean=True, with_std=True, copy=False))
+    # Categorical Preprocessors
+    onehot_encoder = OneHotEncoder(categories='auto', sparse=False, handle_unknown='ignore')
+    categorical_imputer = SimpleImputer(strategy='constant', copy=False)
+
+    # Numerical Preprocessors
+    numerical_imputer = SimpleImputer(strategy='median', copy=False)
+    standard_scaler = StandardScaler(with_mean=True, with_std=True, copy=False)
+
+    preprocessors['categorical'] = [categorical_imputer, onehot_encoder]
+    preprocessors['numerical'] = [numerical_imputer, standard_scaler]
 
     return preprocessors
 
 
 class TabularFeatureValidator(BaseFeatureValidator):
+
     def _fit(
         self,
         X: SUPPORTED_FEAT_TYPES,
@@ -96,24 +99,29 @@ def _fit(
         # The final output of a validator is a numpy array. But pandas
         # gives us information about the column dtype
         if isinstance(X, np.ndarray):
+
             X = self.numpy_array_to_pandas(X)
+            # Replace the data type from the previously saved type.
+            self.data_type = type(X)
+            # save all the information about the column order and data types
+            self._check_data(X)
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
+
             X = cast(pd.DataFrame, X)
 
-            if not X.select_dtypes(include='object').empty:
-                X = self.infer_objects(X)
+            self.all_nan_columns = set([column for column in X.columns if X[column].isna().all()])
 
-            self._check_data(X)
             categorical_columns, numerical_columns, feat_type = self._get_columns_info(X)
 
             self.enc_columns = categorical_columns
-            if len(categorical_columns) >= 0:
-                X = self.impute_nan_in_categories(X)
+
             preprocessors = get_tabular_preprocessors()
-            self.column_transformer = _create_column_transformer(preprocessors=preprocessors,
-                                                                 numerical_columns=numerical_columns,
-                                                                 categorical_columns=categorical_columns)
+            self.column_transformer = _create_column_transformer(
+                preprocessors=preprocessors,
+                numerical_columns=numerical_columns,
+                categorical_columns=categorical_columns,
+            )
 
             # Mypy redefinition
             assert self.column_transformer is not None
@@ -140,15 +148,8 @@ def comparator(cmp1: str, cmp2: str) -> int:
                 key=functools.cmp_to_key(comparator)
             )
 
-            if len(categorical_columns) > 0:
-                self.categories = [
-                    # We fit an ordinal encoder, where all categorical
-                    # columns are shifted to the left
-                    list(range(len(cat)))
-                    for cat in self.column_transformer.named_transformers_[
-                        'categorical_pipeline'].named_steps['onehotencoder'].categories_
-                ]
-
+            # differently to categorical_columns and numerical_columns,
+            # this saves the index of the column.
             for i, type_ in enumerate(self.feat_type):
                 if 'numerical' in type_:
                     self.numerical_columns.append(i)
@@ -156,7 +157,8 @@ def comparator(cmp1: str, cmp2: str) -> int:
                     self.categorical_columns.append(i)
 
         # Lastly, store the number of features
-        self.num_features = np.shape(X)[1]
+        self.num_features = len(X.columns)
+
         return self
 
     def transform(
@@ -189,16 +191,19 @@ def transform(
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
             X = cast(pd.DataFrame, X)
 
-            # Also remove the object dtype for new data
-            if not X.select_dtypes(include='object').empty:
-                X = self.infer_objects(X)
-
         # Check the data here so we catch problems on new test data
         self._check_data(X)
-        # We also need to fillna on the transformation
-        # in case test data is provided
-        if len(self.categorical_columns) >= 0:
-            X = self.impute_nan_in_categories(X)
+
+        # in case of test data being all none and train data
+        # having a value for a categorical column.
+        # We need to convert the column in test data to
+        # object otherwise the test column is interpreted as float
+        if len(self.categorical_columns) > 0:
+            categorical_columns = self.column_transformer.transformers_[0][-1]
+            for column in categorical_columns:
+                if X[column].isna().all():
+                    X[column] = X[column].astype('object')
+
         X = self.column_transformer.transform(X)
 
         # Sparse related transformations
@@ -268,13 +273,13 @@ def _check_data(
             X = cast(pd.DataFrame, X)
 
             # Handle objects if possible
-            if not X.select_dtypes(include='object').empty:
+            exist_object_columns = has_object_columns(X.dtypes.values)
+            if exist_object_columns:
                 X = self.infer_objects(X)
 
             # Define the column to be encoded here as the feature validator is fitted once
             # per estimator
             # enc_columns, _ = self._get_columns_to_encode(X)
-
             column_order = [column for column in X.columns]
             if len(self.column_order) > 0:
                 if self.column_order != column_order:
@@ -288,13 +293,21 @@ def _check_data(
 
             dtypes = [dtype.name for dtype in X.dtypes]
             if len(self.dtypes) > 0:
-                if self.dtypes != dtypes:
-                    raise ValueError("Changing the dtype of the features after fit() is "
-                                     "not supported. Fit() method was called with "
-                                     "{} whereas the new features have {} as type".format(self.dtypes,
-                                                                                          dtypes,
-                                                                                          )
-                                     )
+                dtypes_diff = [s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]
+                if any(dtypes_diff):
+                    if self.all_nan_columns is not None and len(self.all_nan_columns) > 0:
+                        if len(set(X.columns[dtypes_diff]).difference(self.all_nan_columns)) != 0:
+                            # we expect the dtypes to only be different if the column belongs
+                            # to all_nan_columns as these columns would be imputed. if there is
+                            # a value in the test set for a column in all_nan_columns, pandas
+                            # does not recognise the dtype of the test column properly
+                            raise ValueError("Changing the dtype of the features after fit() is "
+                                             "not supported. The dtype of some columns are different "
+                                             "between training and test datasets. Fit() method was called with "
+                                             "{} whereas the new features have {} as type".format(self.dtypes,
+                                                                                                  dtypes,
+                                                                                                  )
+                                             )
             else:
                 self.dtypes = dtypes
 
@@ -310,8 +323,10 @@ def _get_columns_info(
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
         Returns:
-            enc_columns (List[str]):
-                Columns to encode, if any
+            categorical_columns: (List[str])
+                List of the names of categorical columns.
+            numerical_columns: (List[str])
+                List of the names of numerical columns.
             feat_type:
                 Type of each column numerical/categorical
         """
@@ -323,51 +338,44 @@ def _get_columns_info(
 
         # Make sure each column is a valid type
         for i, column in enumerate(X.columns):
-            if X[column].dtype.name in ['category', 'bool']:
-
+            if self.all_nan_columns is not None and column in self.all_nan_columns:
+                continue
+            column_dtype = self.dtypes[i]
+            err_msg = "Valid types are `numerical`, `categorical` or `boolean`, " \
+                      "but input Column {} has an invalid type `{}`.".format(column, column_dtype)
+            if column_dtype in ['category', 'bool']:
                 categorical_columns.append(column)
                 feat_type.append('categorical')
             # Move away from np.issubdtype as it causes
             # TypeError: data type not understood in certain pandas types
-            elif not is_numeric_dtype(X[column]):
-                if X[column].dtype.name == 'object':
-                    raise ValueError(
-                        "Input Column {} has invalid type object. "
-                        "Cast it to a valid dtype before using it in AutoPyTorch. "
-                        "Valid types are numerical, categorical or boolean. "
-                        "You can cast it to a valid dtype using "
-                        "pandas.Series.astype ."
-                        "If working with string objects, the following "
-                        "tutorial illustrates how to work with text data: "
-                        "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html".format(
-                            # noqa: E501
-                            column,
-                        )
-                    )
-                elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(
-                    X[column].dtype
-                ):
-                    raise ValueError(
-                        "AutoPyTorch does not support time and/or date datatype as given "
-                        "in column {}. Please convert the time information to a numerical value "
-                        "first. One example on how to do this can be found on "
-                        "https://stats.stackexchange.com/questions/311494/".format(
-                            column,
-                        )
-                    )
-                else:
-                    raise ValueError(
-                        "Input Column {} has unsupported dtype {}. "
-                        "Supported column types are categorical/bool/numerical dtypes. "
-                        "Make sure your data is formatted in a correct way, "
-                        "before feeding it to AutoPyTorch.".format(
-                            column,
-                            X[column].dtype.name,
-                        )
-                    )
-            else:
+            elif is_numeric_dtype(column_dtype):
                 feat_type.append('numerical')
                 numerical_columns.append(column)
+            elif column_dtype == 'object':
+                # TODO verify how would this happen when we always convert the object dtypes to category
+                raise ValueError(
+                    "{} Cast it to a valid dtype before feeding it to AutoPyTorch. "
+                    "You can cast it to a valid dtype using pandas.Series.astype."
+                    "If you are working with string objects, the following "
+                    "tutorial illustrates how to work with text data: "
+                    "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html".format(
+                        # noqa: E501
+                        err_msg,
+                    )
+                )
+            elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(column_dtype):
+                raise ValueError(
+                    "{} Convert the time information to a numerical value"
+                    " before feeding it to AutoPyTorch. "
+                    "One example of the conversion can be found on "
+                    "https://stats.stackexchange.com/questions/311494/".format(err_msg)
+                )
+            else:
+                raise ValueError(
+                    "{} Make sure your data is formatted in a correct way"
+                    "before feeding it to AutoPyTorch.".format(err_msg)
+                )
+
         return categorical_columns, numerical_columns, feat_type
 
     def list_to_dataframe(
@@ -394,7 +402,7 @@ def list_to_dataframe(
         """
 
         # If a list was provided, it will be converted to pandas
-        X_train = pd.DataFrame(data=X_train).infer_objects()
+        X_train = pd.DataFrame(data=X_train).convert_dtypes()
         self.logger.warning("The provided feature types to AutoPyTorch are of type list."
                             "Features have been interpreted as: {}".format([(col, t) for col, t in
                                                                             zip(X_train.columns, X_train.dtypes)]))
@@ -403,7 +411,8 @@ def list_to_dataframe(
                 self.logger.warning("Train features are a list while the provided test data"
                                     "is {}. X_test will be casted as DataFrame.".format(type(X_test))
                                     )
-            X_test = pd.DataFrame(data=X_test).infer_objects()
+            X_test = pd.DataFrame(data=X_test).convert_dtypes()
+
         return X_train, X_test
 
     @staticmethod
@@ -446,65 +455,33 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
                     self.logger.warning(f"Tried to cast column {key} to {dtype} caused {e}")
                     pass
         else:
+            # Calling for the first time to infer the categories
             X = X.infer_objects()
-            for column in X.columns:
-                if not is_numeric_dtype(X[column]):
+            for column, data_type in zip(X.columns, X.dtypes):
+                if not is_numeric_dtype(data_type):
                     X[column] = X[column].astype('category')
-            self.object_dtype_mapping = {column: X[column].dtype for column in X.columns}
+
+            # only numerical attributes and categories
+            self.object_dtype_mapping = {column: data_type for column, data_type in zip(X.columns, X.dtypes)}
+
         self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}")
+
         return X
 
-    def impute_nan_in_categories(self,
-                                 X: pd.DataFrame
-                                 ) -> pd.DataFrame:
-        """
-        impute missing values before encoding,
-        remove once sklearn natively supports
-        it in ordinal encoding. Sklearn issue:
-        "https://github.com/scikit-learn/scikit-learn/issues/17123)"
-        Arguments:
-            X (pd.DataFrame):
-                data to be interpreted.
-        Returns:
-            pd.DataFrame
-        """
 
-        # To be on the safe side, map always to the same missing
-        # value per column
-        if not hasattr(self, 'dict_nancol_to_missing'):
-            self.dict_missing_value_per_col: Dict[str, Any] = {}
-
-        # First make sure that we do not alter the type of the column which cause:
-        # TypeError: '<' not supported between instances of 'int' and 'str'
-        # in the encoding
-        for column in self.enc_columns:
-            if X[column].isna().any():
-                if column not in self.dict_missing_value_per_col:
-                    try:
-                        float(X[column].dropna().values[0])
-                        can_cast_as_number = True
-                    except Exception:
-                        can_cast_as_number = False
-                    if can_cast_as_number:
-                        # In this case, we expect to have a number as category
-                        # it might be string, but its value represent a number
-                        missing_value: Union[str, int] = '-1' if isinstance(X[column].dropna().values[0], str) else -1
-                    else:
-                        missing_value = 'Missing!'
-
-                    # Make sure this missing value is not seen before
-                    # Do this check for categorical columns
-                    # else modify the value
-                    if hasattr(X[column], 'cat'):
-                        while missing_value in X[column].cat.categories:
-                            if isinstance(missing_value, str):
-                                missing_value += '0'
-                            else:
-                                missing_value += missing_value
-                    self.dict_missing_value_per_col[column] = missing_value
-
-                # Convert the frame in place
-                X[column].cat.add_categories([self.dict_missing_value_per_col[column]],
-                                             inplace=True)
-                X.fillna({column: self.dict_missing_value_per_col[column]}, inplace=True)
-        return X
+def has_object_columns(
+    feature_types: pd.Series,
+) -> bool:
+    """
+    Indicate whether on a Series of dtypes for a Pandas DataFrame
+    there exists one or more object columns.
+
+    Arguments:
+        feature_types (pd.Series):
+            The feature types for a DataFrame.
+    Returns:
+        bool:
+            True if the DataFrame dtypes contain an object column, False
+            otherwise.
+    """
+    return np.dtype('O') in feature_types
diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index f9ba2855e..535023cd2 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -317,6 +317,93 @@ def test_featurevalidator_get_columns_to_encode():
     assert feature_types == ['numerical', 'numerical', 'categorical', 'categorical']
 
 
+def test_featurevalidator_remove_nan_catcolumns():
+    """
+    Make sure categorical columns that have only nan values are removed.
+    """
+    # First case, there exist null columns in the train set
+    # and the same columns are not all null for the test set.
+    validator = TabularFeatureValidator()
+
+    df_train = pd.DataFrame(
+        [
+            {'A': 1, 'B': np.nan, 'C': np.nan},
+            {'A': np.nan, 'C': np.nan},
+            {'A': 1}
+        ],
+        dtype='category',
+    )
+    df_test = pd.DataFrame(
+        [
+            {'A': np.nan, 'B': np.nan, 'C': 5},
+            {'A': np.nan, 'C': np.nan},
+            {'A': 1}
+        ],
+        dtype='category',
+    )
+
+    validator.fit(df_train)
+    transformed_df_train = validator.transform(df_train)
+    transformed_df_test = validator.transform(df_test)
+
+    assert np.array_equal(transformed_df_train, np.array([[0, 1], [1, 0], [0, 1]], dtype=float))
+    assert np.array_equal(transformed_df_test, np.array([[1, 0], [1, 0], [0, 1]], dtype=float))
+
+    # Second case, there exist null columns in the training set and the same
+    # are null in the test set.
+    validator = TabularFeatureValidator()
+
+    df_train = pd.DataFrame(
+        [
+            {'A': 1, 'B': np.nan, 'C': np.nan},
+            {'A': np.nan, 'C': np.nan},
+            {'A': 1}
+        ],
+        dtype='category',
+    )
+    df_test = pd.DataFrame(
+        [
+            {'A': np.nan, 'B': np.nan, 'C': np.nan},
+            {'A': np.nan, 'C': np.nan},
+            {'A': 1}
+        ],
+        dtype='category',
+    )
+
+    validator.fit(df_train)
+    transformed_df_train = validator.transform(df_train)
+    transformed_df_test = validator.transform(df_test)
+
+    assert np.array_equal(transformed_df_train, np.array([[0, 1], [1, 0], [0, 1]], dtype=float))
+    assert np.array_equal(transformed_df_test, np.array([[1, 0], [1, 0], [0, 1]], dtype=float))
+
+    # Third case, there exist no null columns in the training set and a
+    # few null columns exist in the test set.
+    validator = TabularFeatureValidator()
+
+    df_train = pd.DataFrame(
+        [
+            {'A': 1, 'B': 1},
+            {'A': 2, 'B': 2}
+        ],
+        dtype='category',
+    )
+    df_test = pd.DataFrame(
+        [
+            {'A': np.nan, 'B': np.nan},
+            {'A': np.nan, 'B': np.nan}
+        ],
+        dtype='category',
+    )
+
+    validator.fit(df_train)
+    transformed_df_train = validator.transform(df_train)
+    transformed_df_test = validator.transform(df_test)
+
+    assert np.array_equal(transformed_df_train, np.array([[1, 0, 1, 0], [0, 1, 0, 1]], dtype=float))
+    assert np.array_equal(transformed_df_test, np.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=float))
+
+
 def test_features_unsupported_calls_are_raised():
     """
     Makes sure we raise a proper message to the user,
@@ -550,15 +637,16 @@ def test_feature_validator_imbalanced_data():
     validator.fit(X_train)
 
     train_feature_types = copy.deepcopy(validator.feat_type)
-    assert train_feature_types == ['numerical', 'numerical', 'numerical', 'numerical']
+    assert train_feature_types == ['numerical']
     # validator will throw an error if the column types are not the same
     transformed_X_test = validator.transform(X_test)
     transformed_X_test = pd.DataFrame(transformed_X_test)
-    null_columns = []
-    for column in transformed_X_test.columns:
-        if transformed_X_test[column].isna().all():
-            null_columns.append(column)
-    assert null_columns == [0, 2, 3]
+    assert sorted(validator.all_nan_columns) == sorted(['A', 'C', 'D'])
+    # as there are no categorical columns, we can make such an
+    # assertion. We only expect to drop the all nan columns
+    total_all_nan_columns = len(validator.all_nan_columns)
+    total_columns = len(validator.column_order)
+    assert total_columns - total_all_nan_columns == len(transformed_X_test.columns)
 
     # Columns with not all null values in the train split and
     # completely null on the test split.
@@ -577,14 +665,10 @@ def test_feature_validator_imbalanced_data():
     X_test = pd.DataFrame.from_dict(test_features)
     validator = TabularFeatureValidator()
     validator.fit(X_train)
+
     train_feature_types = copy.deepcopy(validator.feat_type)
     assert train_feature_types == ['categorical', 'numerical', 'numerical']
 
     transformed_X_test = validator.transform(X_test)
     transformed_X_test = pd.DataFrame(transformed_X_test)
-    null_columns = []
-    for column in transformed_X_test.columns:
-        if transformed_X_test[column].isna().all():
-            null_columns.append(column)
-
-    assert null_columns == [1]
+    assert not len(validator.all_nan_columns)

From 96614099ee598dd823bf6fcfc86c10358eaab0b3 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Mon, 11 Oct 2021 15:05:10 +0200
Subject: [PATCH 37/54] Apply suggestions from code review

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 autoPyTorch/data/tabular_feature_validator.py | 31 +++++++++----------
 .../training/trainer/RowCutMixTrainer.py      | 17 +++++-----
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 3f939bc98..e5944da19 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -127,21 +127,18 @@ def _fit(
             assert self.column_transformer is not None
             self.column_transformer.fit(X)
 
-            # The column transformer reoders the feature types - we therefore need to change
-            # it as well
-            # This means columns are shifted to the right
+            # The column transformer reorders the feature types
+            # therefore, we need to change the order of columns as well
+            # This means categorical columns are shifted to the right
             def comparator(cmp1: str, cmp2: str) -> int:
-                if (
-                    cmp1 == 'categorical' and cmp2 == 'categorical'
-                    or cmp1 == 'numerical' and cmp2 == 'numerical'
-                ):
-                    return 0
-                elif cmp1 == 'categorical' and cmp2 == 'numerical':
-                    return -1
-                elif cmp1 == 'numerical' and cmp2 == 'categorical':
-                    return 1
-                else:
-                    raise ValueError((cmp1, cmp2))
+                """ Order so that categorical columns come right and numerical columns come left """
+                choices = ['categorical', 'numerical']
+                if cmp1 not in choices or cmp2 not in choices:
+                    raise ValueError('The comparator for the column order only accepts {}, '
+                                                'but got {} and {}'.format(choices, cmp1, cmp2))
+
+                idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
+                return idx1 - idx2
 
             self.feat_type = sorted(
                 feat_type,
@@ -353,7 +350,7 @@ def _get_columns_info(
                 numerical_columns.append(column)
             elif column_dtype == 'object':
                 # TODO verify how would this happen when we always convert the object dtypes to category
-                raise ValueError(
+                raise TypeError(
                     "{} Cast it to a valid dtype before feeding it to AutoPyTorch. "
                     "You can cast it to a valid dtype using pandas.Series.astype."
                     "If you are working with string objects, the following "
@@ -364,14 +361,14 @@ def _get_columns_info(
                     )
                 )
             elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(column_dtype):
-                raise ValueError(
+                raise TypeError(
                     "{} Convert the time information to a numerical value"
                     " before feeding it to AutoPyTorch. "
                     "One example of the conversion can be found on "
                     "https://stats.stackexchange.com/questions/311494/".format(err_msg)
                 )
             else:
-                raise ValueError(
+                raise TypeError(
                     "{} Make sure your data is formatted in a correct way"
                     "before feeding it to AutoPyTorch.".format(err_msg)
                 )
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index 9ab76ed59..409b07e9d 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -29,21 +29,24 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         beta = 1.0
         lam = self.random_state.beta(beta, beta)
         batch_size, n_columns = np.shape(X)
-        index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)
+        # shuffled_indices: Shuffled version of torch.arange(batch_size) 
+        shuffled_indices = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)
 
         r = self.random_state.rand(1)
         if beta <= 0 or r > self.alpha:
-            return X, {'y_a': y, 'y_b': y[index], 'lam': 1}
+            return X, {'y_a': y, 'y_b': y[shuffled_indices], 'lam': 1}
 
-        indices = torch.tensor(self.random_state.choice(range(n_columns), max(1, np.int32(n_columns * lam)),
+        cut_column_indices = torch.tensor(self.random_state.choice(range(n_columns), max(1, np.int32(n_columns * lam)),
                                                         replace=False))
 
-        X[:, indices] = X[index, :][:, indices]
+        # Replace the values in `cut_indices` columns with
+        # the values from `permed_indices`
+        X[:, cut_indices] = X[shuffled_indices, :][:, cut_column_indices]
 
-        # Adjust lam
-        lam = 1 - (len(indices) / n_columns)
+        # Since we cannot cut exactly `lam x 100 %` of rows, we need to adjust the `lam`
+        lam = 1 - (len(cut_column_indices) / n_columns)
 
-        y_a, y_b = y, y[index]
+        y_a, y_b = y, y[shuffled_indices]
 
         return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 

From 36cb3c4438fa07a4f60ea0515b38b5bc14c36953 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 11 Oct 2021 15:05:26 +0200
Subject: [PATCH 38/54] resolve code issues with new versions

---
 .../normalise/ImageNormalizer.py              |  8 ++++----
 .../normalise/NoNormalizer.py                 |  8 ++++----
 .../setup/network_backbone/ResNetBackbone.py  | 18 ++++++++++-------
 .../network_backbone/ShapedResNetBackbone.py  | 14 ++++++++-----
 .../setup/network_backbone/utils.py           | 20 +++++++++----------
 autoPyTorch/utils/common.py                   |  2 +-
 requirements.txt                              |  8 ++++----
 7 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py
index 4327d6346..a3be8fa79 100644
--- a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-import torch.tensor
+import torch
 
 from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer
 
@@ -30,16 +30,16 @@ def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "ImageNormalizer":
         self.std = X['dataset_properties']['std']
         return self
 
-    def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]:
+    def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]:
         """
         Makes the autoPyTorchPreprocessingComponent Callable. Calling the component
         calls the transform function of the underlying early_preprocessor and
         returns the transformed array.
         Args:
-            X (Union[np.ndarray, torch.tensor]): input data tensor
+            X (Union[np.ndarray, torch.Tensor]): input data tensor
 
         Returns:
-            Union[np.ndarray, torch.tensor]: Transformed data tensor
+            Union[np.ndarray, torch.Tensor]: Transformed data tensor
         """
         X = (X - self.mean) / self.std
         return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py
index 7aeb83a9c..b36a50f4e 100644
--- a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-import torch.tensor
+import torch
 
 from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import (
     BaseNormalizer
@@ -34,16 +34,16 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         X.update({'normalise': self})
         return X
 
-    def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]:
+    def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]:
         """
         Makes the autoPyTorchPreprocessingComponent Callable. Calling the component
         calls the transform function of the underlying early_preprocessor and
         returns the transformed array.
         Args:
-            X (Union[np.ndarray, torch.tensor]): input data tensor
+            X (Union[np.ndarray, torch.Tensor]): input data tensor
 
         Returns:
-            Union[np.ndarray, torch.tensor]: Transformed data tensor
+            Union[np.ndarray, torch.Tensor]: Transformed data tensor
         """
         return X
 
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
index 10f509741..4a7893f94 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
@@ -139,8 +139,8 @@ def get_hyperparameter_search_space(
                                                                                value_range=(True, False),
                                                                                default_value=True,
                                                                                ),
-        shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace(
-            hyperparameter="shake_alpha_beta_method",
+        shake_shake_update_func: HyperparameterSearchSpace = HyperparameterSearchSpace(
+            hyperparameter="shake_shake_update_func",
             value_range=('shake-shake',
                          'shake-even',
                          'even-even',
@@ -195,14 +195,18 @@ def get_hyperparameter_search_space(
             cs.add_hyperparameter(mb_choice)
             cs.add_condition(CS.EqualsCondition(mb_choice, use_sc, True))
 
+            shake_shake_update_func_conditional: List[str] = list()
             if shake_drop_prob_flag:
                 shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter)
                 cs.add_hyperparameter(shake_drop_prob)
                 cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop"))
-            if shake_shake_flag or shake_drop_prob_flag:
-                method = get_hyperparameter(shake_alpha_beta_method, CategoricalHyperparameter)
+                shake_shake_update_func_conditional.append('shake-drop')
+            if shake_shake_flag:
+                shake_shake_update_func_conditional.append('shake-shake')
+            if len(shake_shake_update_func_conditional) > 0:
+                method = get_hyperparameter(shake_shake_update_func, CategoricalHyperparameter)
                 cs.add_hyperparameter(method)
-                cs.add_condition(CS.InCondition(method, mb_choice, ["shake-shake", "shake-drop"]))
+                cs.add_condition(CS.InCondition(method, mb_choice, shake_shake_update_func_conditional))
 
         # It is the upper bound of the nr of groups,
         # since the configuration will actually be sampled.
@@ -340,12 +344,12 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
                 x2 = self.shake_shake_layers(x)
                 alpha, beta = shake_get_alpha_beta(is_training=self.training,
                                                    is_cuda=x.is_cuda,
-                                                   method=self.config['shake_alpha_beta_method'])
+                                                   method=self.config['shake_shake_update_func'])
                 x = shake_shake(x1, x2, alpha, beta)
             elif self.config["multi_branch_choice"] == 'shake-drop':
                 x = self.layers(x)
                 alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda,
-                                                   method=self.config['shake_alpha_beta_method'])
+                                                   method=self.config['shake_shake_update_func'])
                 bl = shake_drop_get_bl(
                     self.block_index,
                     1 - self.config["max_shake_drop_probability"],
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
index 12c6d4e74..f9ad4e6e0 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
@@ -145,8 +145,8 @@ def get_hyperparameter_search_space(  # type: ignore[override]
                                                                                           'stairs'),
                                                                              default_value='funnel',
                                                                              ),
-        shake_alpha_beta_method: HyperparameterSearchSpace = HyperparameterSearchSpace(
-            hyperparameter="shake_alpha_beta_method",
+        shake_shake_update_func: HyperparameterSearchSpace = HyperparameterSearchSpace(
+            hyperparameter="shake_shake_update_func",
             value_range=('shake-shake',
                          'shake-even',
                          'even-even',
@@ -203,13 +203,17 @@ def get_hyperparameter_search_space(  # type: ignore[override]
             cs.add_hyperparameter(mb_choice)
             cs.add_condition(CS.EqualsCondition(mb_choice, use_sc, True))
 
+            shake_shake_update_func_conditional: List[str] = list()
             if shake_drop_prob_flag:
                 shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter)
                 cs.add_hyperparameter(shake_drop_prob)
                 cs.add_condition(CS.EqualsCondition(shake_drop_prob, mb_choice, "shake-drop"))
-            if shake_shake_flag or shake_drop_prob_flag:
-                method = get_hyperparameter(shake_alpha_beta_method, CategoricalHyperparameter)
+                shake_shake_update_func_conditional.append('shake-drop')
+            if shake_shake_flag:
+                shake_shake_update_func_conditional.append('shake-shake')
+            if len(shake_shake_update_func_conditional) > 0:
+                method = get_hyperparameter(shake_shake_update_func, CategoricalHyperparameter)
                 cs.add_hyperparameter(method)
-                cs.add_condition(CS.InCondition(method, mb_choice, ["shake-shake", "shake-drop"]))
+                cs.add_condition(CS.InCondition(method, mb_choice, shake_shake_update_func_conditional))
 
         return cs
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index fefe85e7a..315badb5a 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -37,8 +37,8 @@ def forward(
         ctx: typing.Any,  # No typing for AutogradContext
         x1: torch.Tensor,
         x2: torch.Tensor,
-        alpha: torch.tensor,
-        beta: torch.tensor,
+        alpha: torch.Tensor,
+        beta: torch.Tensor,
     ) -> torch.Tensor:
         ctx.save_for_backward(x1, x2, alpha, beta)
 
@@ -66,10 +66,10 @@ def backward(ctx: typing.Any,
 class ShakeDropFunction(Function):
     @staticmethod
     def forward(ctx: typing.Any,
-                x: torch.tensor,
-                alpha: torch.tensor,
-                beta: torch.tensor,
-                bl: torch.tensor,
+                x: torch.Tensor,
+                alpha: torch.Tensor,
+                beta: torch.Tensor,
+                bl: torch.Tensor,
                 ) -> torch.Tensor:
         ctx.save_for_backward(x, alpha, beta, bl)
 
@@ -96,7 +96,7 @@ def shake_get_alpha_beta(
         is_training: bool,
         is_cuda: bool,
         method: str
-) -> typing.Tuple[torch.tensor, torch.tensor]:
+) -> typing.Tuple[torch.Tensor, torch.Tensor]:
     """
     The methods used in this function have been introduced in 'ShakeShake Regularisation'
     https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well.
@@ -139,14 +139,14 @@ def shake_drop_get_bl(
         num_blocks: int,
         is_training: bool,
         is_cuda: bool
-) -> torch.tensor:
+) -> torch.Tensor:
     pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
 
     if is_training:
         # Move to torch.randn(1) for reproducibility
-        bl = torch.tensor(1.0) if torch.randn(1) <= pl else torch.tensor(0.0)
+        bl = torch.Tensor(1.0) if torch.randn(1) <= pl else torch.Tensor(0.0)
     else:
-        bl = torch.tensor(pl)
+        bl = torch.Tensor(pl)
 
     if is_cuda:
         bl = bl.cuda()
diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py
index 98bd20a68..13543b5fc 100644
--- a/autoPyTorch/utils/common.py
+++ b/autoPyTorch/utils/common.py
@@ -96,7 +96,7 @@ def replace_prefix_in_config_dict(config: Dict[str, Any], prefix: str, replace:
             k.startswith(prefix)}
 
 
-def custom_collate_fn(batch: List) -> List[Optional[torch.tensor]]:
+def custom_collate_fn(batch: List) -> List[Optional[torch.Tensor]]:
     """
     In the case of not providing a y tensor, in a
     dataset of form {X, y}, y would be None.
diff --git a/requirements.txt b/requirements.txt
index 2195e64b4..f4a913789 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,16 +1,16 @@
 pandas
-torch<=1.8
-torchvision<=0.9
+torch
+torchvision
 tensorboard
 scikit-learn>=0.24.0,<0.25.0
 numpy
-scipy==1.6.3
+scipy
 lockfile
 imgaug>=0.4.0
 ConfigSpace>=0.4.14,<0.5
 pynisher>=0.6.3
 pyrfr>=0.7,<0.9
-smac>=0.13.1,<0.14
+smac
 dask
 distributed>=2.2.0
 catboost

From 6953ee72d09863fe5a838c5c38a00775cae4266b Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 11 Oct 2021 15:11:03 +0200
Subject: [PATCH 39/54] Address comments from shuhei

---
 autoPyTorch/api/base_task.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 189919013..71fa82ded 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -840,10 +840,10 @@ def _search(
 
         if self.task_type != dataset.task_type:
             raise ValueError("Incompatible dataset entered for current task,"
-                             "expected dataset to have task type :{} got "
+                             "expected dataset to have task type :{} but got "
                              ":{}".format(self.task_type, dataset.task_type))
         if precision not in [16, 32, 64]:
-            raise ValueError("precision must be one of 16, 32, 64. Got {}".format(precision))
+            raise ValueError("precision must be one of 16, 32, 64 but got {}".format(precision))
 
         # Initialise information needed for the experiment
         experiment_task_name: str = 'runSearch'
@@ -1429,7 +1429,7 @@ def fit_ensemble(
                 func_eval_time_limit_secs = time_for_task // 2
                 self._logger.warning(
                     "Capping the func_eval_time_limit_secs to {} to have "
-                    "time for a least 2 models to ensemble.".format(
+                    "time for at least 2 models to ensemble.".format(
                         func_eval_time_limit_secs
                     )
                 )

From 4b7e75f6ecf815059e42076e93e1e1b519a2c4be Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 11 Oct 2021 17:32:36 +0200
Subject: [PATCH 40/54] make run_traditional_ml function

---
 autoPyTorch/api/base_task.py | 74 ++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 71fa82ded..623ffb1db 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -9,6 +9,7 @@
 import tempfile
 import time
 import typing
+from typing_extensions import runtime
 import unittest.mock
 import warnings
 from abc import abstractmethod
@@ -746,6 +747,37 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
                               save_external=True)
         return
 
+    def run_traditional_ml(
+        self,
+        current_task_name: str,
+        runtime_limit: int,
+        func_eval_time_limit_secs: int) -> None:
+        """
+        This function can be used to run the suite of traditional machine
+        learning models during the current task (for e.g, ensemble fit, search) 
+
+        Args:
+            current_task_name (str): name of the current task, 
+            runtime_limit (int): time limit for fitting traditional models,
+            func_eval_time_limit_secs (int): Time limit
+                for a single call to the machine learning model.
+                Model fitting will be terminated if the machine
+                learning algorithm runs over the time limit.
+        """
+        assert self._logger is not None  # for mypy compliancy
+        if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
+            self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
+        else:
+            traditional_task_name = 'runTraditional'
+            self._stopwatch.start_task(traditional_task_name)
+            elapsed_time = self._stopwatch.wall_elapsed(current_task_name)
+            time_for_traditional = int(runtime_limit - elapsed_time)
+            self._do_traditional_prediction(
+                func_eval_time_limit_secs=func_eval_time_limit_secs,
+                time_left=time_for_traditional,
+            )
+            self._stopwatch.stop_task(traditional_task_name)
+
     def _search(
         self,
         optimize_metric: str,
@@ -927,22 +959,12 @@ def _search(
 
         # ============> Run traditional ml
         # We only want to run traditional predictions in case we want to build an ensemble
+        # We want time for at least 1 Neural network in SMAC
         if enable_traditional_pipeline and self.ensemble_size > 0:
-            if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
-                self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
-            else:
-                traditional_task_name = 'runTraditional'
-                self._stopwatch.start_task(traditional_task_name)
-                elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
-                # We want time for at least 1 Neural network in SMAC
-                time_for_traditional = int(
-                    self._time_for_task - elapsed_time - func_eval_time_limit_secs
-                )
-                self._do_traditional_prediction(
-                    func_eval_time_limit_secs=func_eval_time_limit_secs,
-                    time_left=time_for_traditional,
-                )
-                self._stopwatch.stop_task(traditional_task_name)
+            traditional_runtime_limit = int(self._time_for_task - func_eval_time_limit_secs)
+            self.run_traditional_ml(current_task_name=self.dataset_name,
+                                    runtime_limit=traditional_runtime_limit,
+                                    func_eval_time_limit_secs=func_eval_time_limit_secs)
 
         # ============> Starting ensemble
         self.precision = precision
@@ -1433,29 +1455,17 @@ def fit_ensemble(
                         func_eval_time_limit_secs
                     )
                 )
-        # We only want to run dummy predictions in case we want to build an ensemble
+        # ============> Run Dummy predictions
         dummy_task_name = 'runDummy'
         self._stopwatch.start_task(dummy_task_name)
         self._do_dummy_prediction()
         self._stopwatch.stop_task(dummy_task_name)
 
         # ============> Run traditional ml
-        # We only want to run traditional predictions in case we want to build an ensemble
-        if enable_traditional_pipeline and self.ensemble_size > 0:
-            if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
-                self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
-            else:
-                traditional_task_name = 'runTraditional'
-                self._stopwatch.start_task(traditional_task_name)
-                elapsed_time = self._stopwatch.wall_elapsed(ensemble_fit_task_name)
-                time_for_traditional = int(
-                    time_for_task - elapsed_time
-                )
-                self._do_traditional_prediction(
-                    func_eval_time_limit_secs=func_eval_time_limit_secs,
-                    time_left=time_for_traditional,
-                )
-                self._stopwatch.stop_task(traditional_task_name)
+        if enable_traditional_pipeline:
+            self.run_traditional_ml(current_task_name=ensemble_fit_task_name,
+                                    runtime_limit=time_for_task,
+                                    func_eval_time_limit_secs=func_eval_time_limit_secs)
 
         elapsed_time = self._stopwatch.wall_elapsed(ensemble_fit_task_name)
         time_left_for_ensemble = int(time_for_task - elapsed_time)

From cce21a6328916828d596496a76f398460fd2e1b6 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 11 Oct 2021 18:11:17 +0200
Subject: [PATCH 41/54] implement suggestion from shuhei and fix bug in
 rowcutmixtrainer

---
 .../setup/network_backbone/utils.py           | 28 ++++++++-----------
 .../training/trainer/RowCutMixTrainer.py      |  2 +-
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index 315badb5a..6aab99449 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -93,9 +93,9 @@ def backward(ctx: typing.Any,
 
 
 def shake_get_alpha_beta(
-        is_training: bool,
-        is_cuda: bool,
-        method: str
+    is_training: bool,
+    is_cuda: bool,
+    method: str
 ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
     """
     The methods used in this function have been introduced in 'ShakeShake Regularisation'
@@ -108,15 +108,11 @@ def shake_get_alpha_beta(
 
     # TODO implement other update methods
     # alpha is the weight ratio for the forward pass and beta is that for the backward pass
-    if method == 'even-even':
-        alpha = torch.FloatTensor([0.5])
-    else:
-        alpha = torch.rand(1)
-
-    if method == 'shake-shake':
-        beta = torch.rand(1)
-    elif method in ['shake-even', 'even-even']:
+    alpha = torch.FloatTensor([0.5]) if method.startswith('even') else torch.rand(1)
+    if method.endswith('even'):
         beta = torch.FloatTensor([0.5])
+    elif method.endswith('shake'):
+        beta = torch.rand(1)
     elif method == 'M3':
         # Table 4 in the paper `Shake-Shake regularization`
         rnd = torch.rand(1)
@@ -134,11 +130,11 @@ def shake_get_alpha_beta(
 
 
 def shake_drop_get_bl(
-        block_index: int,
-        min_prob_no_shake: float,
-        num_blocks: int,
-        is_training: bool,
-        is_cuda: bool
+    block_index: int,
+    min_prob_no_shake: float,
+    num_blocks: int,
+    is_training: bool,
+    is_cuda: bool
 ) -> torch.Tensor:
     pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index 409b07e9d..1c8a78d38 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -41,7 +41,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
 
         # Replace the values in `cut_indices` columns with
         # the values from `permed_indices`
-        X[:, cut_indices] = X[shuffled_indices, :][:, cut_column_indices]
+        X[:, cut_column_indices] = X[shuffled_indices, :][:, cut_column_indices]
 
         # Since we cannot cut exactly `lam x 100 %` of rows, we need to adjust the `lam`
         lam = 1 - (len(cut_column_indices) / n_columns)

From 4b5db0de51e59bd1770cf9d95d3f0e9830a1dd7e Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 11 Oct 2021 18:25:13 +0200
Subject: [PATCH 42/54] fix return type docstring

---
 autoPyTorch/data/tabular_feature_validator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index e5944da19..9f25956e2 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -320,11 +320,11 @@ def _get_columns_info(
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
         Returns:
-            categorical_columns: (List[str])
+            categorical_columns (List[str])
                 List of the names of categorical columns.
-            numerical_columns: (List[str])
+            numerical_columns (List[str])
                 List of the names of numerical columns.
-            feat_type:
+            feat_type (List[str])
                 Type of each column numerical/categorical
         """
         # Register if a column needs encoding

From 80f1c1e3c84bf53207930f19f960b0428eac2f02 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 12 Oct 2021 00:24:05 +0200
Subject: [PATCH 43/54] add better documentation and fix bug in
 shake_drop_get_bl

---
 autoPyTorch/api/base_task.py                  |  2 +-
 autoPyTorch/data/tabular_feature_validator.py | 11 ++++-------
 .../setup/network_backbone/utils.py           | 19 ++++++++++++++++++-
 .../training/trainer/RowCutMixTrainer.py      |  5 +++--
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 623ffb1db..23a3bb854 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1423,7 +1423,7 @@ def fit_ensemble(
                              "fit_ensemble().".format(self.__class__.__name__))
 
         if precision not in [16, 32, 64]:
-            raise ValueError("precision must be one of 16, 32, 64. Got {}".format(precision))
+            raise ValueError("precision must be one of 16, 32, 64 but got {}".format(precision))
 
         if self._logger is None:
             self._logger = self._get_logger(self.dataset.dataset_name)
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 9f25956e2..7305f9de7 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -29,11 +29,11 @@ def _create_column_transformer(
     creates a sklearn column transformer with appropriate
     columns associated with their preprocessors.
     Args:
-        preprocessors (Dict[str, List]):
+        preprocessors (Dict[str, List[BaseEstimator]]):
             Dictionary containing list of numerical and categorical preprocessors.
-        numerical_columns (List[int]):
+        numerical_columns (List[str]):
             List of names of numerical columns
-        categorical_columns (List[int]):
+        categorical_columns (List[str]):
             List of names of categorical columns
     Returns:
         ColumnTransformer
@@ -135,7 +135,7 @@ def comparator(cmp1: str, cmp2: str) -> int:
                 choices = ['categorical', 'numerical']
                 if cmp1 not in choices or cmp2 not in choices:
                     raise ValueError('The comparator for the column order only accepts {}, '
-                                                'but got {} and {}'.format(choices, cmp1, cmp2))
+                                     'but got {} and {}'.format(choices, cmp1, cmp2))
 
                 idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
                 return idx1 - idx2
@@ -274,9 +274,6 @@ def _check_data(
             if exist_object_columns:
                 X = self.infer_objects(X)
 
-            # Define the column to be encoded here as the feature validator is fitted once
-            # per estimator
-            # enc_columns, _ = self._get_columns_to_encode(X)
             column_order = [column for column in X.columns]
             if len(self.column_order) > 0:
                 if self.column_order != column_order:
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index 6aab99449..7b5287062 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -136,11 +136,28 @@ def shake_drop_get_bl(
     is_training: bool,
     is_cuda: bool
 ) -> torch.Tensor:
+    """
+    The sampling of Bernoulli random variable
+    based on Eq. (4) in the paper
+    Args:
+        block_index (int): The index of the block from the input layer
+        min_prob_no_shake (float): The initial shake probability
+        num_blocks (int): The total number of building blocks
+        is_training (bool): Whether it is training
+        is_cuda (bool): Whether the tensor is on CUDA
+    Returns:
+        bl (torch.Tensor): a Bernoulli random variable in {0, 1}
+    Reference:
+        ShakeDrop Regularization for Deep Residual Learning
+        Yoshihiro Yamada et. al. (2020)
+        paper: https://arxiv.org/pdf/1802.02375.pdf
+        implementation: https://github.com/imenurok/ShakeDrop
+    """
     pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
 
     if is_training:
         # Move to torch.randn(1) for reproducibility
-        bl = torch.Tensor(1.0) if torch.randn(1) <= pl else torch.Tensor(0.0)
+        bl = torch.Tensor(1.0) if torch.rand(1) <= pl else torch.Tensor(0.0)
     else:
         bl = torch.Tensor(pl)
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index 1c8a78d38..00012c711 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -36,8 +36,9 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         if beta <= 0 or r > self.alpha:
             return X, {'y_a': y, 'y_b': y[shuffled_indices], 'lam': 1}
 
-        cut_column_indices = torch.tensor(self.random_state.choice(range(n_columns), max(1, np.int32(n_columns * lam)),
-                                                        replace=False))
+        cut_column_indices = torch.tensor(self.random_state.choice(range(n_columns),
+                                          max(1, np.int32(n_columns * lam)),
+                                          replace=False))
 
         # Replace the values in `cut_indices` columns with
         # the values from `permed_indices`

From dc01cd3b9b669f0e81f1c0e77f869317a20bbd54 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Tue, 12 Oct 2021 10:26:50 +0200
Subject: [PATCH 44/54] Apply suggestions from code review

Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
---
 .../setup/network_backbone/utils.py           |  2 +-
 test/test_data/test_feature_validator.py      | 62 ++++++++-----------
 2 files changed, 28 insertions(+), 36 deletions(-)

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index 7b5287062..ea0a3c9d0 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -100,7 +100,7 @@ def shake_get_alpha_beta(
     """
     The methods used in this function have been introduced in 'ShakeShake Regularisation'
     https://arxiv.org/abs/1705.07485. The names have been taken from the paper as well.
-    Currently, this function supports `even-even`, `shake-even` and `shake-shake`
+    Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`.
     """
     if not is_training:
         result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))
diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index 535023cd2..c7b817e0f 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -317,14 +317,23 @@ def test_featurevalidator_get_columns_to_encode():
     assert feature_types == ['numerical', 'numerical', 'categorical', 'categorical']
 
 
-def test_featurevalidator_remove_nan_catcolumns():
+def feature_validator_remove_nan_catcolumns(df_train: pd.DataFrame, df_test: pd.DataFrame,
+                                            ans_train: np.ndarray, ans_test: np.ndarray) -> None:
+    validator = TabularFeatureValidator()
+    validator.fit(df_train)
+    transformed_df_train = validator.transform(df_train)
+    transformed_df_test = validator.transform(df_test)
+
+    assert np.array_equal(transformed_df_train, ans_train)
+    assert np.array_equal(transformed_df_test, ans_test)
+
+
+def test_feature_validator_remove_nan_catcolumns():
     """
     Make sure categorical columns that have only nan values are removed.
     """
-    # First case, there exist null columns in the train set
-    # and the same columns are not all null for the test set.
-    validator = TabularFeatureValidator()
-
+    # First case, there exist null columns (B and C) in the train set
+    # and a same column (C) are not all null for the test set.
     df_train = pd.DataFrame(
         [
             {'A': 1, 'B': np.nan, 'C': np.nan},
@@ -333,6 +342,7 @@ def test_featurevalidator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
+    ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64)
     df_test = pd.DataFrame(
         [
             {'A': np.nan, 'B': np.nan, 'C': 5},
@@ -341,18 +351,11 @@ def test_featurevalidator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
+    ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64)
+    feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
 
-    validator.fit(df_train)
-    transformed_df_train = validator.transform(df_train)
-    transformed_df_test = validator.transform(df_test)
-
-    assert np.array_equal(transformed_df_train, np.array([[0, 1], [1, 0], [0, 1]], dtype=float))
-    assert np.array_equal(transformed_df_test, np.array([[1, 0], [1, 0], [0, 1]], dtype=float))
-
-    # Second case, there exist null columns in the training set and the same
-    # are null in the test set.
-    validator = TabularFeatureValidator()
-
+    # Second case, there exist null columns (B and C) in the training set and
+    # the same columns (B and C) are null in the test set.
     df_train = pd.DataFrame(
         [
             {'A': 1, 'B': np.nan, 'C': np.nan},
@@ -361,6 +364,7 @@ def test_featurevalidator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
+    ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64)
     df_test = pd.DataFrame(
         [
             {'A': np.nan, 'B': np.nan, 'C': np.nan},
@@ -369,18 +373,11 @@ def test_featurevalidator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
+    ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64)
+    feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
 
-    validator.fit(df_train)
-    transformed_df_train = validator.transform(df_train)
-    transformed_df_test = validator.transform(df_test)
-
-    assert np.array_equal(transformed_df_train, np.array([[0, 1], [1, 0], [0, 1]], dtype=float))
-    assert np.array_equal(transformed_df_test, np.array([[1, 0], [1, 0], [0, 1]], dtype=float))
-
-    # Third case, there exist no null columns in the training set and a
-    # few null columns exist in the test set.
-    validator = TabularFeatureValidator()
-
+    # Third case, there exist no null columns in the training set and
+    # null columns exist in the test set.
     df_train = pd.DataFrame(
         [
             {'A': 1, 'B': 1},
@@ -388,6 +385,7 @@ def test_featurevalidator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
+    ans_train = np.array([[1, 0, 1, 0], [0, 1, 0, 1]], dtype=np.float64)
     df_test = pd.DataFrame(
         [
             {'A': np.nan, 'B': np.nan},
@@ -395,14 +393,8 @@ def test_featurevalidator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
-
-    validator.fit(df_train)
-    transformed_df_train = validator.transform(df_train)
-    transformed_df_test = validator.transform(df_test)
-
-    assert np.array_equal(transformed_df_train, np.array([[1, 0, 1, 0], [0, 1, 0, 1]], dtype=float))
-    assert np.array_equal(transformed_df_test, np.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=float))
-
+    ans_test = np.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.float64)
+    feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
 
 def test_features_unsupported_calls_are_raised():
     """

From f0c2aa04b9521d59a371d263596dc573f3b4339d Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 12 Oct 2021 10:53:40 +0200
Subject: [PATCH 45/54] add test for comparator and other improvements based on
 PR comments

---
 autoPyTorch/api/base_task.py                  |  8 ++---
 autoPyTorch/data/base_target_validator.py     |  6 ++--
 autoPyTorch/data/tabular_feature_validator.py | 34 +++++++++++++------
 autoPyTorch/pipeline/base_pipeline.py         | 31 +++++++++--------
 .../training/trainer/RowCutMixTrainer.py      |  2 +-
 test/test_data/test_feature_validator.py      | 30 ++++++++++++++++
 6 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 23a3bb854..a85695801 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -9,7 +9,6 @@
 import tempfile
 import time
 import typing
-from typing_extensions import runtime
 import unittest.mock
 import warnings
 from abc import abstractmethod
@@ -751,13 +750,14 @@ def run_traditional_ml(
         self,
         current_task_name: str,
         runtime_limit: int,
-        func_eval_time_limit_secs: int) -> None:
+        func_eval_time_limit_secs: int
+    ) -> None:
         """
         This function can be used to run the suite of traditional machine
-        learning models during the current task (for e.g, ensemble fit, search) 
+        learning models during the current task (for e.g, ensemble fit, search)
 
         Args:
-            current_task_name (str): name of the current task, 
+            current_task_name (str): name of the current task,
             runtime_limit (int): time limit for fitting traditional models,
             func_eval_time_limit_secs (int): Time limit
                 for a single call to the machine learning model.
diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py
index 0fb318476..c88dc5e9b 100644
--- a/autoPyTorch/data/base_target_validator.py
+++ b/autoPyTorch/data/base_target_validator.py
@@ -43,8 +43,10 @@ class BaseTargetValidator(BaseEstimator):
     """
     def __init__(self,
                  is_classification: bool = False,
-                 logger: Optional[Union[PicklableClientLogger, logging.Logger
-                                                      ]] = None,
+                 logger: Optional[Union[PicklableClientLogger,
+                                        logging.Logger
+                                        ]
+                                  ] = None,
                  ) -> None:
         self.is_classification = is_classification
 
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 7305f9de7..ba0687c13 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -78,6 +78,29 @@ def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
 
 class TabularFeatureValidator(BaseFeatureValidator):
 
+    @staticmethod
+    def _comparator(cmp1: str, cmp2: str) -> int:
+        """Order so that categorical columns come right and numerical columns come left
+
+        Args:
+            cmp1 (str): First variable to compare
+            cmp2 (str): Second variable to compare
+
+        Raises:
+            ValueError: if the values of the variables to compare
+            are not in 'categorical' or 'numerical'
+
+        Returns:
+            int: either [0, -1, 1]
+        """
+        choices = ['categorical', 'numerical']
+        if cmp1 not in choices or cmp2 not in choices:
+            raise ValueError('The comparator for the column order only accepts {}, '
+                             'but got {} and {}'.format(choices, cmp1, cmp2))
+
+        idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
+        return idx1 - idx2
+
     def _fit(
         self,
         X: SUPPORTED_FEAT_TYPES,
@@ -130,19 +153,10 @@ def _fit(
             # The column transformer reorders the feature types
             # therefore, we need to change the order of columns as well
             # This means categorical columns are shifted to the right
-            def comparator(cmp1: str, cmp2: str) -> int:
-                """ Order so that categorical columns come right and numerical columns come left """
-                choices = ['categorical', 'numerical']
-                if cmp1 not in choices or cmp2 not in choices:
-                    raise ValueError('The comparator for the column order only accepts {}, '
-                                     'but got {} and {}'.format(choices, cmp1, cmp2))
-
-                idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
-                return idx1 - idx2
 
             self.feat_type = sorted(
                 feat_type,
-                key=functools.cmp_to_key(comparator)
+                key=functools.cmp_to_key(self._comparator)
             )
 
             # differently to categorical_columns and numerical_columns,
diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
index 7d4fd17a9..d98be9bd4 100644
--- a/autoPyTorch/pipeline/base_pipeline.py
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -400,6 +400,7 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                 raise ValueError("Unknown node name. Expected update node name to be in {} "
                                  "got {}".format(self.named_steps.keys(), update.node_name))
             node = self.named_steps[update.node_name]
+            node_name = node.__class__.__name__
             # if node is a choice module
             if hasattr(node, 'get_components'):
                 split_hyperparameter = update.hyperparameter.split(':')
@@ -429,16 +430,16 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                         if choice not in components.keys():
                             raise ValueError("Unknown component choice for node {}. "
                                              "Expected update hyperparameter "
-                                             "to be in {}, but got {}".format(node.__class__.__name__,
-                                                                         components.keys(), choice))
+                                             "to be in {}, but got {}".format(node_name,
+                                                                              components.keys(), choice))
                 # check if the component whose hyperparameter
                 # needs to be updated is in components of the
                 # choice module
                 elif split_hyperparameter[0] not in components.keys():
                     raise ValueError("Unknown component choice for node {}. "
                                      "Expected update component "
-                                     "to be in {}, but got {}".format(node.__class__.__name__,
-                                                                 components.keys(), split_hyperparameter[0]))
+                                     "to be in {}, but got {}".format(node_name,
+                                                                      components.keys(), split_hyperparameter[0]))
                 else:
                     # check if hyperparameter is in the search space of the component
                     component = components[split_hyperparameter[0]]
@@ -451,15 +452,15 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                                 component.get_hyperparameter_search_space(
                                     dataset_properties=self.dataset_properties).get_hyperparameter_names()]):
                             continue
+                        component_hyperparameters = component.get_hyperparameter_search_space(
+                            dataset_properties=self.dataset_properties).get_hyperparameter_names()
                         raise ValueError("Unknown hyperparameter for  component {} of node {}."
                                          " Expected update hyperparameter "
                                          "to be in {}, but got {}.".format(component.__name__,
-                                                                      node.__class__.__name__,
-                                                                      component.get_hyperparameter_search_space(
-                                                                          dataset_properties=self.dataset_properties
-                                                                      ).get_hyperparameter_names(),
-                                                                      split_hyperparameter[1]
-                                                                      )
+                                                                           node_name,
+                                                                           component_hyperparameters,
+                                                                           split_hyperparameter[1]
+                                                                           )
                                          )
             else:
                 if update.hyperparameter not in node.get_hyperparameter_search_space(
@@ -468,13 +469,13 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
                             node.get_hyperparameter_search_space(
                                 dataset_properties=self.dataset_properties).get_hyperparameter_names()]):
                         continue
+                    node_hyperparameters = node.get_hyperparameter_search_space(
+                        dataset_properties=self.dataset_properties).get_hyperparameter_names()
                     raise ValueError("Unknown hyperparameter for node {}. "
                                      "Expected update hyperparameter "
-                                     "to be in {}, but got {}".format(node.__class__.__name__,
-                                                                 node.
-                                                                 get_hyperparameter_search_space(
-                                                                     dataset_properties=self.dataset_properties).
-                                                                 get_hyperparameter_names(), update.hyperparameter))
+                                     "to be in {}, but got {}".format(node_name,
+                                                                      node_hyperparameters,
+                                                                      update.hyperparameter))
 
     def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]]
                             ) -> List[Tuple[str, autoPyTorchChoice]]:
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index 00012c711..f85cf253f 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -29,7 +29,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         beta = 1.0
         lam = self.random_state.beta(beta, beta)
         batch_size, n_columns = np.shape(X)
-        # shuffled_indices: Shuffled version of torch.arange(batch_size) 
+        # shuffled_indices: Shuffled version of torch.arange(batch_size)
         shuffled_indices = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)
 
         r = self.random_state.rand(1)
diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index c7b817e0f..54570b7a8 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -1,4 +1,5 @@
 import copy
+import functools
 
 import numpy as np
 
@@ -331,6 +332,11 @@ def feature_validator_remove_nan_catcolumns(df_train: pd.DataFrame, df_test: pd.
 def test_feature_validator_remove_nan_catcolumns():
     """
     Make sure categorical columns that have only nan values are removed.
+    The ans arrays contain the final output after calling transform on
+    datasets, this includes fitting and transforming a column transformer
+    containing simple imputation for both categorical and numerical
+    columns, scaling for numerical columns and one hot encoding for
+    categorical columns.
     """
     # First case, there exist null columns (B and C) in the train set
     # and a same column (C) are not all null for the test set.
@@ -396,6 +402,7 @@ def test_feature_validator_remove_nan_catcolumns():
     ans_test = np.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.float64)
     feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
 
+
 def test_features_unsupported_calls_are_raised():
     """
     Makes sure we raise a proper message to the user,
@@ -664,3 +671,26 @@ def test_feature_validator_imbalanced_data():
     transformed_X_test = validator.transform(X_test)
     transformed_X_test = pd.DataFrame(transformed_X_test)
     assert not len(validator.all_nan_columns)
+
+
+def test_comparator():
+    numerical = 'numerical'
+    categorical = 'categorical'
+
+    validator = TabularFeatureValidator
+
+    feat_type = [numerical, categorical] * 10
+    ans = [categorical] * 10 + [numerical] * 10
+    feat_type = sorted(
+        feat_type,
+        key=functools.cmp_to_key(validator._comparator)
+    )
+    assert ans == feat_type
+
+    feat_type = [numerical] * 10 + [categorical] * 10
+    ans = [categorical] * 10 + [numerical] * 10
+    feat_type = sorted(
+        feat_type,
+        key=functools.cmp_to_key(validator._comparator)
+    )
+    assert ans == feat_type

From 57111e9c9479fa982974d145b1da5f6c3d71e8d3 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 12 Oct 2021 11:02:56 +0200
Subject: [PATCH 46/54] fix bug in test

---
 test/test_data/test_feature_validator.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index 54570b7a8..ae9b7102c 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -312,10 +312,13 @@ def test_featurevalidator_get_columns_to_encode():
     for col in df.columns:
         df[col] = df[col].astype(col)
 
-    enc_columns, feature_types = validator._get_columns_to_encode(df)
+    validator.fit(df)
 
-    assert enc_columns == ['category', 'bool']
-    assert feature_types == ['numerical', 'numerical', 'categorical', 'categorical']
+    categorical_columns, numerical_columns, feat_type = validator._get_columns_info(df)
+
+    assert numerical_columns == ['int', 'float']
+    assert categorical_columns == ['category', 'bool']
+    assert feat_type == ['numerical', 'numerical', 'categorical', 'categorical']
 
 
 def feature_validator_remove_nan_catcolumns(df_train: pd.DataFrame, df_test: pd.DataFrame,

From 153878f9170230d069566adbe6bfb946bb49c4fb Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 12 Oct 2021 19:49:11 +0200
Subject: [PATCH 47/54] [fix] Fix the condition in the raising error of
 all_nan_columns

---
 autoPyTorch/data/tabular_feature_validator.py | 40 ++++++++-----------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index ba0687c13..e20a29e6f 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -291,33 +291,27 @@ def _check_data(
             column_order = [column for column in X.columns]
             if len(self.column_order) > 0:
                 if self.column_order != column_order:
-                    raise ValueError("Changing the column order of the features after fit() is "
-                                     "not supported. Fit() method was called with "
-                                     "{} whereas the new features have {} as type".format(self.column_order,
-                                                                                          column_order, )
-                                     )
+                    raise ValueError("The column order of the features must not be changed after fit(), but"
+                                     " the column order are different between training ({}) and"
+                                     " test ({}) datasets.".format(self.column_order, column_order))
             else:
                 self.column_order = column_order
 
             dtypes = [dtype.name for dtype in X.dtypes]
-            if len(self.dtypes) > 0:
-                dtypes_diff = [s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]
-                if any(dtypes_diff):
-                    if self.all_nan_columns is not None and len(self.all_nan_columns) > 0:
-                        if len(set(X.columns[dtypes_diff]).difference(self.all_nan_columns)) != 0:
-                            # we expect the dtypes to only be different if the column belongs
-                            # to all_nan_columns as these columns would be imputed. if there is
-                            # a value in the test set for a column in all_nan_columns, pandas
-                            # does not recognise the dtype of the test column properly
-                            raise ValueError("Changing the dtype of the features after fit() is "
-                                             "not supported. The dtype of some columns are different "
-                                             "between training and test datasets. Fit() method was called with "
-                                             "{} whereas the new features have {} as type".format(self.dtypes,
-                                                                                                  dtypes,
-                                                                                                  )
-                                             )
-            else:
+
+            dtypes_diff = [s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]
+            if len(self.dtypes) == 0:
                 self.dtypes = dtypes
+            elif (
+                any(dtypes_diff)  # the dtypes of some columns are different in train and test dataset
+                and self.all_nan_columns is not None  # Ignore all_nan_columns is None
+                and len(set(X.columns[dtypes_diff]).difference(self.all_nan_columns)) != 0
+            ):
+                # The dtypes can be different if and only if the column belongs
+                # to all_nan_columns as these columns would be imputed.
+                raise ValueError("The dtype of the features must not be changed after fit(), but"
+                                 " the dtypes of some columns are different between training ({}) and"
+                                 " test ({}) datasets.".format(self.dtypes, dtypes))
 
     def _get_columns_info(
         self,
@@ -350,7 +344,7 @@ def _get_columns_info(
                 continue
             column_dtype = self.dtypes[i]
             err_msg = "Valid types are `numerical`, `categorical` or `boolean`, " \
-                      "but input Column {} has an invalid type `{}`.".format(column, column_dtype)
+                      "but input column {} has an invalid type `{}`.".format(column, column_dtype)
             if column_dtype in ['category', 'bool']:
                 categorical_columns.append(column)
                 feat_type.append('categorical')

From 64862fef618cc08ce5310bcfcf093e72637d756f Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 12 Oct 2021 19:52:50 +0200
Subject: [PATCH 48/54] [refactor] Unite name conventions of numpy array and
 pandas dataframe

---
 autoPyTorch/data/base_feature_validator.py    |  4 ++--
 autoPyTorch/data/tabular_feature_validator.py | 10 +++++-----
 test/test_data/test_feature_validator.py      |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py
index 9ed46d6e6..420f0808c 100644
--- a/autoPyTorch/data/base_feature_validator.py
+++ b/autoPyTorch/data/base_feature_validator.py
@@ -89,7 +89,7 @@ def fit(
 
         # If a list was provided, it will be converted to pandas
         if isinstance(X_train, list):
-            X_train, X_test = self.list_to_dataframe(X_train, X_test)
+            X_train, X_test = self.list_to_pandas(X_train, X_test)
 
         self._check_data(X_train)
 
@@ -158,7 +158,7 @@ def transform(
 
         raise NotImplementedError()
 
-    def list_to_dataframe(
+    def list_to_pandas(
         self,
         X_train: SUPPORTED_FEAT_TYPES,
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index e20a29e6f..5f3bdc787 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -123,7 +123,7 @@ def _fit(
         # gives us information about the column dtype
         if isinstance(X, np.ndarray):
 
-            X = self.numpy_array_to_pandas(X)
+            X = self.numpy_to_pandas(X)
             # Replace the data type from the previously saved type.
             self.data_type = type(X)
             # save all the information about the column order and data types
@@ -194,10 +194,10 @@ def transform(
 
         # If a list was provided, it will be converted to pandas
         if isinstance(X, list):
-            X, _ = self.list_to_dataframe(X)
+            X, _ = self.list_to_pandas(X)
 
         if isinstance(X, np.ndarray):
-            X = self.numpy_array_to_pandas(X)
+            X = self.numpy_to_pandas(X)
 
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
             X = cast(pd.DataFrame, X)
@@ -380,7 +380,7 @@ def _get_columns_info(
 
         return categorical_columns, numerical_columns, feat_type
 
-    def list_to_dataframe(
+    def list_to_pandas(
         self,
         X_train: SUPPORTED_FEAT_TYPES,
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
@@ -418,7 +418,7 @@ def list_to_dataframe(
         return X_train, X_test
 
     @staticmethod
-    def numpy_array_to_pandas(
+    def numpy_to_pandas(
         X: np.ndarray,
     ) -> pd.DataFrame:
         """
diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index ae9b7102c..2b388f2e3 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -284,9 +284,9 @@ def test_featurevalidator_fitontypeA_transformtypeB(input_data_featuretest):
     if isinstance(input_data_featuretest, pd.DataFrame):
         pytest.skip("Column order change in pandas is not supported")
     elif isinstance(input_data_featuretest, np.ndarray):
-        complementary_type = validator.numpy_array_to_pandas(input_data_featuretest)
+        complementary_type = validator.numpy_to_pandas(input_data_featuretest)
     elif isinstance(input_data_featuretest, list):
-        complementary_type, _ = validator.list_to_dataframe(input_data_featuretest)
+        complementary_type, _ = validator.list_to_pandas(input_data_featuretest)
     elif sparse.issparse(input_data_featuretest):
         complementary_type = sparse.csr_matrix(input_data_featuretest.todense())
     else:

From 410c7fe39afc25f6b0894398ad6dcfa1f031c1df Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 12 Oct 2021 20:39:42 +0200
Subject: [PATCH 49/54] [doc] Add the description about the tabular feature
 transformation

---
 test/test_data/test_feature_validator.py | 33 ++++++++++++++++++++----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index 2b388f2e3..3a70549b0 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -335,14 +335,37 @@ def feature_validator_remove_nan_catcolumns(df_train: pd.DataFrame, df_test: pd.
 def test_feature_validator_remove_nan_catcolumns():
     """
     Make sure categorical columns that have only nan values are removed.
-    The ans arrays contain the final output after calling transform on
-    datasets, this includes fitting and transforming a column transformer
-    containing simple imputation for both categorical and numerical
-    columns, scaling for numerical columns and one hot encoding for
-    categorical columns.
+    Transform performs the folloing:
+        * simple imputation for both
+        * scaling for numerical
+        * one-hot encoding for categorical
+    For example,
+        data = [
+            {'A': 1, 'B': np.nan, 'C': np.nan},
+            {'A': np.nan, 'B': 3, 'C': np.nan},
+            {'A': 2, 'B': np.nan, 'C': np.nan}
+        ]
+    and suppose all the columns are categorical,
+    then
+        * `A` in {np.nan, 1, 2}
+        * `B` in {np.nan, 3}
+        * `C` in {np.nan} <=== it will be dropped.
+
+    So in the column A,
+        * np.nan ==> [1, 0, 0]
+        * 1      ==> [0, 1, 0]
+        * 2      ==> [0, 0, 1]
+    in the column B,
+        * np.nan ==> [1, 0]
+        * 3      ==> [0, 1]
+    Therefore, by concatenating,
+        * {'A': 1, 'B': np.nan, 'C': np.nan} ==> [0, 1, 0, 1, 0]
+        * {'A': np.nan, 'B': 3, 'C': np.nan} ==> [1, 0, 0, 0, 1]
+        * {'A': 2, 'B': np.nan, 'C': np.nan} ==> [0, 0, 1, 1, 0]
     """
     # First case, there exist null columns (B and C) in the train set
     # and a same column (C) are not all null for the test set.
+
     df_train = pd.DataFrame(
         [
             {'A': 1, 'B': np.nan, 'C': np.nan},

From baa7ab87fe64cf6d8e1f9638ff029147e6b2a310 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 12 Oct 2021 21:01:45 +0200
Subject: [PATCH 50/54] [doc] Add the description of the tabular feature
 transformation

---
 autoPyTorch/data/tabular_feature_validator.py | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 5f3bdc787..ae449fa14 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -188,6 +188,41 @@ def transform(
         Return:
             np.ndarray:
                 The transformed array
+
+        Note:
+            The default transform performs the folloing:
+                * simple imputation for both
+                * scaling for numerical
+                * one-hot encoding for categorical
+            For example, here is a simple case
+            of which all the columns are categorical.
+                data = [
+                    {'A': 1, 'B': np.nan, 'C': np.nan},
+                    {'A': np.nan, 'B': 3, 'C': np.nan},
+                    {'A': 2, 'B': np.nan, 'C': np.nan}
+                ]
+            and suppose all the columns are categorical,
+            then
+                * `A` in {np.nan, 1, 2}
+                * `B` in {np.nan, 3}
+                * `C` in {np.nan} <=== it will be dropped.
+
+            So in the column A,
+                * np.nan ==> [1, 0, 0] (always the index 0)
+                * 1      ==> [0, 1, 0]
+                * 2      ==> [0, 0, 1]
+            in the column B,
+                * np.nan ==> [1, 0]
+                * 3      ==> [0, 1]
+            Therefore, by concatenating,
+                * {'A': 1, 'B': np.nan, 'C': np.nan} ==> [0, 1, 0, 1, 0]
+                * {'A': np.nan, 'B': 3, 'C': np.nan} ==> [1, 0, 0, 0, 1]
+                * {'A': 2, 'B': np.nan, 'C': np.nan} ==> [0, 0, 1, 1, 0]
+                ==> [
+                    [0, 1, 0, 1, 0],
+                    [1, 0, 0, 0, 1],
+                    [0, 0, 1, 1, 0]
+                ]
         """
         if not self._is_fitted:
             raise NotFittedError("Cannot call transform on a validator that is not fitted")

From e1eb8547ac45e0747527a3d33c307837eb21e9ab Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 20 Oct 2021 11:31:56 +0200
Subject: [PATCH 51/54] address comments from arlind

---
 autoPyTorch/data/tabular_feature_validator.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index ae449fa14..62f571037 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -55,7 +55,7 @@ def _create_column_transformer(
 
 def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
     """
-    This function creates a Dictionary containing list
+    This function creates a Dictionary containing a list
     of numerical and categorical preprocessors
     Returns:
         Dict[str, List[BaseEstimator]]
@@ -80,7 +80,7 @@ class TabularFeatureValidator(BaseFeatureValidator):
 
     @staticmethod
     def _comparator(cmp1: str, cmp2: str) -> int:
-        """Order so that categorical columns come right and numerical columns come left
+        """Order so that categorical columns come left and numerical columns come right
 
         Args:
             cmp1 (str): First variable to compare
@@ -97,7 +97,6 @@ def _comparator(cmp1: str, cmp2: str) -> int:
         if cmp1 not in choices or cmp2 not in choices:
             raise ValueError('The comparator for the column order only accepts {}, '
                              'but got {} and {}'.format(choices, cmp1, cmp2))
-
         idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
         return idx1 - idx2
 
@@ -152,7 +151,7 @@ def _fit(
 
             # The column transformer reorders the feature types
             # therefore, we need to change the order of columns as well
-            # This means categorical columns are shifted to the right
+            # This means categorical columns are shifted to the left
 
             self.feat_type = sorted(
                 feat_type,

From 4545fdbf08b8abc62df1a6a3d8c54c4737ea6170 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 20 Oct 2021 11:41:26 +0200
Subject: [PATCH 52/54] address comments from arlind

---
 autoPyTorch/data/tabular_feature_validator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 62f571037..669576b9c 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -57,6 +57,7 @@ def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
     """
     This function creates a Dictionary containing a list
     of numerical and categorical preprocessors
+
     Returns:
         Dict[str, List[BaseEstimator]]
     """

From 8519a48191d58fef95645c14c375a4ccf94687cb Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 20 Oct 2021 11:50:34 +0200
Subject: [PATCH 53/54] change to as_tensor and address comments from arlind

---
 .../pipeline/components/setup/network_backbone/utils.py      | 5 +++--
 .../pipeline/components/training/trainer/RowCutMixTrainer.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
index ea0a3c9d0..96390d003 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/utils.py
@@ -28,6 +28,7 @@ def get_output_shape(network: torch.nn.Module, input_shape: typing.Tuple[int, ..
     placeholder = torch.randn((2, *input_shape), dtype=torch.float)
     with torch.no_grad():
         output = network(placeholder)
+
     return tuple(output.shape[1:])
 
 
@@ -157,9 +158,9 @@ def shake_drop_get_bl(
 
     if is_training:
         # Move to torch.randn(1) for reproducibility
-        bl = torch.Tensor(1.0) if torch.rand(1) <= pl else torch.Tensor(0.0)
+        bl = torch.as_tensor(1.0) if torch.rand(1) <= pl else torch.as_tensor(0.0)
     else:
-        bl = torch.Tensor(pl)
+        bl = torch.as_tensor(pl)
 
     if is_cuda:
         bl = bl.cuda()
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
index f85cf253f..67de50108 100644
--- a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -36,7 +36,7 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         if beta <= 0 or r > self.alpha:
             return X, {'y_a': y, 'y_b': y[shuffled_indices], 'lam': 1}
 
-        cut_column_indices = torch.tensor(self.random_state.choice(range(n_columns),
+        cut_column_indices = torch.as_tensor(self.random_state.choice(range(n_columns),
                                           max(1, np.int32(n_columns * lam)),
                                           replace=False))
 

From 2c3a525ac6fb6fbcb066ef051dfce69ab9472d66 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 20 Oct 2021 11:56:43 +0200
Subject: [PATCH 54/54] correct description for functions in data module

---
 autoPyTorch/data/base_feature_validator.py    | 10 ++++-----
 autoPyTorch/data/base_target_validator.py     |  8 +++----
 autoPyTorch/data/base_validator.py            |  4 ++--
 autoPyTorch/data/tabular_feature_validator.py | 22 +++++++++----------
 autoPyTorch/data/tabular_target_validator.py  |  8 +++----
 5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py
index 420f0808c..a7cab5913 100644
--- a/autoPyTorch/data/base_feature_validator.py
+++ b/autoPyTorch/data/base_feature_validator.py
@@ -79,7 +79,7 @@ def fit(
         The supported data types are List, numpy arrays and pandas DataFrames.
         CSR sparse data types are also supported
 
-        Arguments:
+        Args:
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
@@ -115,7 +115,7 @@ def _fit(
         X: SUPPORTED_FEAT_TYPES,
     ) -> BaseEstimator:
         """
-        Arguments:
+        Args:
             X (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
@@ -133,7 +133,7 @@ def _check_data(
         """
         Feature dimensionality and data type checks
 
-        Arguments:
+        Args:
             X (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
@@ -146,7 +146,7 @@ def transform(
         X: SUPPORTED_FEAT_TYPES,
     ) -> np.ndarray:
         """
-        Arguments:
+        Args:
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features, whose categorical features are going to be
                 transformed
@@ -168,7 +168,7 @@ def list_to_pandas(
 
         If test data is provided, we proactively match it to train data
 
-        Arguments:
+        Args:
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py
index c88dc5e9b..f191e985b 100644
--- a/autoPyTorch/data/base_target_validator.py
+++ b/autoPyTorch/data/base_target_validator.py
@@ -75,7 +75,7 @@ def fit(
         Validates and fit a categorical encoder (if needed) to the targets
         The supported data types are List, numpy arrays and pandas DataFrames.
 
-        Arguments:
+        Args:
             y_train (SUPPORTED_TARGET_TYPES)
                 A set of targets set aside for training
             y_test (Union[SUPPORTED_TARGET_TYPES])
@@ -132,7 +132,7 @@ def _fit(
         y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
     ) -> BaseEstimator:
         """
-        Arguments:
+        Args:
             y_train (SUPPORTED_TARGET_TYPES)
                 The labels of the current task. They are going to be encoded in case
                 of classification
@@ -146,7 +146,7 @@ def transform(
         y: Union[SUPPORTED_TARGET_TYPES],
     ) -> np.ndarray:
         """
-        Arguments:
+        Args:
             y (SUPPORTED_TARGET_TYPES)
                 A set of targets that are going to be encoded if the current task
                 is classification
@@ -163,7 +163,7 @@ def inverse_transform(
         """
         Revert any encoding transformation done on a target array
 
-        Arguments:
+        Args:
             y (Union[np.ndarray, pd.DataFrame, pd.Series]):
                 Target array to be transformed back to original form before encoding
         Returns:
diff --git a/autoPyTorch/data/base_validator.py b/autoPyTorch/data/base_validator.py
index 7528d56ab..4ef54c665 100644
--- a/autoPyTorch/data/base_validator.py
+++ b/autoPyTorch/data/base_validator.py
@@ -58,7 +58,7 @@ def fit(
             + Checks for dimensionality as well as missing values are performed.
             + If performing a classification task, the data is going to be encoded
 
-        Arguments:
+        Args:
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks). If this data contains categorical columns, an encoder is going to
@@ -102,7 +102,7 @@ def transform(
         """
         Transform the given target or features to a numpy array
 
-        Arguments:
+        Args:
             X (SUPPORTED_FEAT_TYPES):
                 A set of features to transform
             y (typing.Optional[SUPPORTED_TARGET_TYPES]):
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 669576b9c..62bd0b465 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -110,7 +110,7 @@ def _fit(
         features (from categorical for example) to a numerical value that further stages
         will be able to use
 
-        Arguments:
+        Args:
             X (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
@@ -180,7 +180,7 @@ def transform(
         Validates and fit a categorical encoder (if needed) to the features.
         The supported data types are List, numpy arrays and pandas DataFrames.
 
-        Arguments:
+        Args:
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features, whose categorical features are going to be
                 transformed
@@ -279,7 +279,7 @@ def _check_data(
         """
         Feature dimensionality and data type checks
 
-        Arguments:
+        Args:
             X (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
@@ -355,7 +355,7 @@ def _get_columns_info(
         """
         Return the columns to be encoded from a pandas dataframe
 
-        Arguments:
+        Args:
             X (pd.DataFrame)
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
@@ -425,7 +425,7 @@ def list_to_pandas(
 
         If test data is provided, we proactively match it to train data
 
-        Arguments:
+        Args:
             X_train (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
@@ -459,7 +459,7 @@ def numpy_to_pandas(
         """
         Converts a numpy array to pandas for type inference
 
-        Arguments:
+        Args:
             X (np.ndarray):
                 data to be interpreted.
 
@@ -474,7 +474,7 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
 
         This has to be done once, so the test and train data are treated equally
 
-        Arguments:
+        Args:
             X (pd.DataFrame):
                 data to be interpreted.
 
@@ -512,12 +512,12 @@ def has_object_columns(
     """
     Indicate whether on a Series of dtypes for a Pandas DataFrame
     there exists one or more object columns.
+            
+    Args:
+        feature_types (pd.Series): The feature types for a DataFrame.
 
-    Arguments:
-        feature_types (pd.Series):
-            The feature types for a DataFrame.
     Returns:
-        bool:
+        bool: 
             True if the DataFrame dtypes contain an object column, False
             otherwise.
     """
diff --git a/autoPyTorch/data/tabular_target_validator.py b/autoPyTorch/data/tabular_target_validator.py
index 239791768..7cbd88c38 100644
--- a/autoPyTorch/data/tabular_target_validator.py
+++ b/autoPyTorch/data/tabular_target_validator.py
@@ -28,7 +28,7 @@ def _fit(
         It does so by also using the classes from the test data, to prevent encoding
         errors
 
-        Arguments:
+        Args:
             y_train (SUPPORTED_TARGET_TYPES)
                 The labels of the current task. They are going to be encoded in case
                 of classification
@@ -100,7 +100,7 @@ def transform(
         Validates and fit a categorical encoder (if needed) to the features.
         The supported data types are List, numpy arrays and pandas DataFrames.
 
-        Arguments:
+        Args:
             y (SUPPORTED_TARGET_TYPES)
                 A set of targets that are going to be encoded if the current task
                 is classification
@@ -152,7 +152,7 @@ def inverse_transform(
         """
         Revert any encoding transformation done on a target array
 
-        Arguments:
+        Args:
             y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]):
                 Target array to be transformed back to original form before encoding
         Returns:
@@ -189,7 +189,7 @@ def _check_data(
         """
         Perform dimensionality and data type checks on the targets
 
-        Arguments:
+        Args:
             y (typing.Union[np.ndarray, pd.DataFrame, pd.Series]):
                 A set of features whose dimensionality and data type is going to be checked
         """