automl
diff --git a/‎autoPyTorch/api/base_task.py
Lines changed: 283 additions & 55 deletions b/‎autoPyTorch/api/base_task.py
Lines changed: 283 additions & 55 deletions
diff --git a/‎autoPyTorch/api/tabular_classification.py
Lines changed: 2 additions & 0 deletions b/‎autoPyTorch/api/tabular_classification.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎autoPyTorch/api/tabular_regression.py
Lines changed: 2 additions & 1 deletion b/‎autoPyTorch/api/tabular_regression.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎autoPyTorch/data/base_feature_validator.py
Lines changed: 36 additions & 9 deletions b/‎autoPyTorch/data/base_feature_validator.py
Lines changed: 36 additions & 9 deletions
diff --git a/‎autoPyTorch/data/base_target_validator.py
Lines changed: 3 additions & 2 deletions b/‎autoPyTorch/data/base_target_validator.py
Lines changed: 3 additions & 2 deletions
@@ -447,6 +447,8 @@ def search(
             dataset_compression=self._dataset_compression,
             feat_types=feat_types)
 
+        if self.dataset is None:
+            raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
 
@@ -79,7 +79,6 @@ class TabularRegressionTask(BaseTask):
             Search space updates that can be used to modify the search
             space of particular components or choice modules of the pipeline
     """
-
     def __init__(
         self,
         seed: int = 1,
@@ -448,6 +447,8 @@ def search(
             dataset_compression=self._dataset_compression,
             feat_types=feat_types)
 
+        if self.dataset is None:
+            raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
 
@@ -1,5 +1,5 @@
 import logging
-from typing import List, Optional, Union
+from typing import List, Optional, Set, Tuple, Union
 
 import numpy as np
 
@@ -24,24 +24,21 @@ class BaseFeatureValidator(BaseEstimator):
             List of the column types found by this estimator during fit.
         data_type (str):
             Class name of the data type provided during fit.
-        column_transformer (Optional[BaseEstimator])
+        encoder (Optional[BaseEstimator])
             Host a encoder object if the data requires transformation (for example,
-            if provided a categorical column in a pandas DataFrame)
-        transformed_columns (List[str])
-            List of columns that were encoded.
+            if provided a categorical column in a pandas DataFrame).
     """
     def __init__(
         self,
         logger: Optional[Union[PicklableClientLogger, logging.Logger]] = None,
-    ):
+    ) -> None:
         # Register types to detect unsupported data format changes
         self.feat_types: Optional[List[str]] = None
         self.data_type: Optional[type] = None
         self.dtypes: List[str] = []
         self.column_order: List[str] = []
 
         self.column_transformer: Optional[BaseEstimator] = None
-        self.transformed_columns: List[str] = []
 
         self.logger: Union[
             PicklableClientLogger, logging.Logger
@@ -53,6 +50,8 @@ def __init__(
         self.categorical_columns: List[int] = []
         self.numerical_columns: List[int] = []
 
+        self.all_nan_columns: Optional[Set[Union[int, str]]] = None
+
         self._is_fitted = False
 
     def fit(
@@ -75,7 +74,7 @@ def fit(
 
         # If a list was provided, it will be converted to pandas
         if isinstance(X_train, list):
-            X_train, X_test = self.list_to_dataframe(X_train, X_test)
+            X_train, X_test = self.list_to_pandas(X_train, X_test)
 
         self._check_data(X_train)
 
@@ -109,6 +108,7 @@ def _fit(
             self:
                 The fitted base estimator
         """
+
         raise NotImplementedError()
 
     def _check_data(
@@ -118,11 +118,12 @@ def _check_data(
         """
         Feature dimensionality and data type checks
 
-        Arguments:
+        Args:
             X (SUPPORTED_FEAT_TYPES):
                 A set of features that are going to be validated (type and dimensionality
                 checks) and a encoder fitted in the case the data needs encoding
         """
+
         raise NotImplementedError()
 
     def transform(
@@ -139,4 +140,30 @@ def transform(
             np.ndarray:
                 The transformed array
         """
+
+        raise NotImplementedError()
+
+    def list_to_pandas(
+        self,
+        X_train: SUPPORTED_FEAT_TYPES,
+        X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
+    ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
+        """
+        Converts a list to a pandas DataFrame. In this process, column types are inferred.
+
+        If test data is provided, we proactively match it to train data
+
+        Args:
+            X_train (SUPPORTED_FEAT_TYPES):
+                A set of features that are going to be validated (type and dimensionality
+                checks) and a encoder fitted in the case the data needs encoding
+            X_test (Optional[SUPPORTED_FEAT_TYPES]):
+                A hold out set of data used for checking
+        Returns:
+            pd.DataFrame:
+                transformed train data from list to pandas DataFrame
+            pd.DataFrame:
+                transformed test data from list to pandas DataFrame
+        """
+
         raise NotImplementedError()
@@ -36,7 +36,7 @@ def __init__(self,
                                         logging.Logger
                                         ]
                                   ] = None,
-                 ):
+                 ) -> None:
         self.is_classification = is_classification
 
         self.data_type: Optional[type] = None
@@ -86,6 +86,7 @@ def fit(
                                      np.shape(y_test)
                                  ))
             if isinstance(y_train, pd.DataFrame):
+                y_train = cast(pd.DataFrame, y_train)
                 y_test = cast(pd.DataFrame, y_test)
                 if y_train.columns.tolist() != y_test.columns.tolist():
                     raise ValueError(
@@ -131,7 +132,7 @@ def _fit(
 
     def transform(
         self,
-        y: Union[SupportedTargetTypes],
+        y: SupportedTargetTypes,
     ) -> np.ndarray:
         """
         Args: