automl
diff --git a/‎autoPyTorch/api/base_task.py
Lines changed: 4 additions & 4 deletions b/‎autoPyTorch/api/base_task.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎autoPyTorch/api/tabular_classification.py
Lines changed: 19 additions & 17 deletions b/‎autoPyTorch/api/tabular_classification.py
Lines changed: 19 additions & 17 deletions
diff --git a/‎autoPyTorch/api/tabular_regression.py
Lines changed: 15 additions & 13 deletions b/‎autoPyTorch/api/tabular_regression.py
Lines changed: 15 additions & 13 deletions
diff --git a/‎autoPyTorch/data/tabular_feature_validator.py
Lines changed: 1 addition & 1 deletion b/‎autoPyTorch/data/tabular_feature_validator.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎autoPyTorch/evaluation/abstract_evaluator.py
Lines changed: 3 additions & 2 deletions b/‎autoPyTorch/evaluation/abstract_evaluator.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎autoPyTorch/evaluation/fit_evaluator.py
Lines changed: 6 additions & 4 deletions b/‎autoPyTorch/evaluation/fit_evaluator.py
Lines changed: 6 additions & 4 deletions
diff --git a/‎autoPyTorch/evaluation/tae.py
Lines changed: 4 additions & 4 deletions b/‎autoPyTorch/evaluation/tae.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎autoPyTorch/evaluation/train_evaluator.py
Lines changed: 8 additions & 5 deletions b/‎autoPyTorch/evaluation/train_evaluator.py
Lines changed: 8 additions & 5 deletions
diff --git a/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/utils.py
Lines changed: 4 additions & 0 deletions b/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/utils.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
Lines changed: 10 additions & 10 deletions b/‎autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
Lines changed: 10 additions & 10 deletions
diff --git a/‎autoPyTorch/pipeline/components/setup/network_backbone/utils.py
Lines changed: 19 additions & 0 deletions b/‎autoPyTorch/pipeline/components/setup/network_backbone/utils.py
Lines changed: 19 additions & 0 deletions
@@ -254,7 +254,7 @@ def get_dataset(self,
                                                         NoResamplingStrategyTypes]] = None,
                     resampling_strategy_args: Optional[Dict[str, Any]] = None,
                     dataset_name: Optional[str] = None,
-                    return_only: Optional[bool] = False
+                    update_dataset_attribute: Optional[bool] = True
                     ) -> BaseDataset:
         raise NotImplementedError("Function called on BaseTask, this can only be called by "
                                   "specific task which is a child of the BaseTask")
@@ -276,7 +276,7 @@ def set_pipeline_config(
             None
         """
         unknown_keys = []
-        for option, value in pipeline_config_kwargs.items():
+        for option in pipeline_config_kwargs.keys():
             if option in self.pipeline_options.keys():
                 pass
             else:
@@ -587,7 +587,7 @@ def _do_dummy_prediction(self) -> None:
             all_supported_metrics=self._all_supported_metrics
         )
 
-        status, cost, runtime, additional_info = ta.run(num_run, cutoff=self._time_for_task)
+        status, _, _, additional_info = ta.run(num_run, cutoff=self._time_for_task)
         if status == StatusType.SUCCESS:
             self._logger.info("Finished creating dummy predictions.")
         else:
@@ -1263,7 +1263,7 @@ def fit_pipeline(self,
                                        resampling_strategy=resampling_strategy,
                                        resampling_strategy_args=resampling_strategy_args,
                                        dataset_name=dataset_name,
-                                       return_only=True)
+                                       update_dataset_attribute=False)
 
         # TAE expects each configuration to have a config_id.
         # For fitting a pipeline as it is not part of the
 
@@ -136,7 +136,7 @@ def get_dataset(self,
                                                         NoResamplingStrategyTypes]] = None,
                     resampling_strategy_args: Optional[Dict[str, Any]] = None,
                     dataset_name: Optional[str] = None,
-                    return_only: Optional[bool] = False
+                    update_dataset_attribute: Optional[bool] = True
                     ) -> BaseDataset:
 
         if dataset_name is None:
@@ -148,27 +148,27 @@ def get_dataset(self,
 
         # Create a validator object to make sure that the data provided by
         # the user matches the autopytorch requirements
-        InputValidator = TabularInputValidator(
+        input_validator = TabularInputValidator(
             is_classification=True,
             logger_port=self._logger_port,
         )
 
         # Fit a input validator to check the provided data
         # Also, an encoder is fit to both train and test data,
         # to prevent unseen categories during inference
-        InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
+        input_validator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
 
         dataset = TabularDataset(
             X=X_train, Y=y_train,
             X_test=X_test, Y_test=y_test,
-            validator=InputValidator,
+            validator=input_validator,
             resampling_strategy=resampling_strategy,
             resampling_strategy_args=resampling_strategy_args,
             dataset_name=dataset_name,
             seed=self.seed
         )
-        if not return_only:
-            self.InputValidator = InputValidator
+        if update_dataset_attribute:
+            self.input_validator = input_validator
             self.dataset = dataset
 
         return dataset
@@ -206,7 +206,7 @@ def search(
                 pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
                 be provided to track the generalization performance of each stage.
             dataset_name (Optional[str]):
-                Name of the dayaset, if None, random value is used
+                Name of the dayaset, if None, time hashed value is used
             optimize_metric (str): name of the metric that is used to
                 evaluate a pipeline.
             budget_type (Optional[str]):
@@ -269,10 +269,12 @@ def search(
 
         """
 
-        assert isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)), \
-            "Val Split is required for HPO search. " \
-            "Expected 'self.resampling_strategy' in" \
-            " '(CrossValTypes, HoldoutValTypes) got {}".format(self.resampling_strategy)
+        if not isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
+            raise ValueError(
+                'Hyperparameter optimization requires a validation split. '
+                'Expected `self.resampling_strategy` to be either '
+                '(CrossValTypes, HoldoutValTypes), but got {}'.format(self.resampling_strategy)
+            )
 
         self.get_dataset(X_train=X_train,
                          y_train=y_train,
@@ -305,28 +307,28 @@ def predict(
             batch_size: Optional[int] = None,
             n_jobs: int = 1
     ) -> np.ndarray:
-        if self.InputValidator is None or not self.InputValidator._is_fitted:
+        if self.input_validator is None or not self.input_validator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
                              "the estimator fit() method.")
 
-        X_test = self.InputValidator.feature_validator.transform(X_test)
+        X_test = self.input_validator.feature_validator.transform(X_test)
         predicted_probabilities = super().predict(X_test, batch_size=batch_size,
                                                   n_jobs=n_jobs)
 
-        if self.InputValidator.target_validator.is_single_column_target():
+        if self.input_validator.target_validator.is_single_column_target():
             predicted_indexes = np.argmax(predicted_probabilities, axis=1)
         else:
             predicted_indexes = (predicted_probabilities > 0.5).astype(int)
 
         # Allow to predict in the original domain -- that is, the user is not interested
         # in our encoded values
-        return self.InputValidator.target_validator.inverse_transform(predicted_indexes)
+        return self.input_validator.target_validator.inverse_transform(predicted_indexes)
 
     def predict_proba(self,
                       X_test: Union[np.ndarray, pd.DataFrame, List],
                       batch_size: Optional[int] = None, n_jobs: int = 1) -> np.ndarray:
-        if self.InputValidator is None or not self.InputValidator._is_fitted:
+        if self.input_validator is None or not self.input_validator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
                              "the estimator fit() method.")
-        X_test = self.InputValidator.feature_validator.transform(X_test)
+        X_test = self.input_validator.feature_validator.transform(X_test)
         return super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)
@@ -128,7 +128,7 @@ def get_dataset(self,
                                                         NoResamplingStrategyTypes]] = None,
                     resampling_strategy_args: Optional[Dict[str, Any]] = None,
                     dataset_name: Optional[str] = None,
-                    return_only: Optional[bool] = False
+                    update_dataset_attribute: Optional[bool] = True
                     ) -> BaseDataset:
 
         if dataset_name is None:
@@ -140,27 +140,27 @@ def get_dataset(self,
 
         # Create a validator object to make sure that the data provided by
         # the user matches the autopytorch requirements
-        InputValidator = TabularInputValidator(
+        input_validator = TabularInputValidator(
             is_classification=False,
             logger_port=self._logger_port,
         )
 
         # Fit a input validator to check the provided data
         # Also, an encoder is fit to both train and test data,
         # to prevent unseen categories during inference
-        InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
+        input_validator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
 
         dataset = TabularDataset(
             X=X_train, Y=y_train,
             X_test=X_test, Y_test=y_test,
-            validator=InputValidator,
+            validator=input_validator,
             resampling_strategy=resampling_strategy,
             resampling_strategy_args=resampling_strategy_args,
             dataset_name=dataset_name,
             seed=self.seed
         )
-        if not return_only:
-            self.InputValidator = InputValidator
+        if update_dataset_attribute:
+            self.input_validator = input_validator
             self.dataset = dataset
 
         return dataset
@@ -255,10 +255,12 @@ def search(
 
         """
 
-        assert isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)), \
-            "Val Split is required for HPO search. " \
-            "Expected 'self.resampling_strategy' in" \
-            " '(CrossValTypes, HoldoutValTypes) got {}".format(self.resampling_strategy)
+        if not isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
+            raise ValueError(
+                'Hyperparameter optimization requires a validation split. '
+                'Expected `self.resampling_strategy` to be either '
+                '(CrossValTypes, HoldoutValTypes), but got {}'.format(self.resampling_strategy)
+            )
 
         self.get_dataset(X_train=X_train,
                          y_train=y_train,
@@ -291,14 +293,14 @@ def predict(
             batch_size: Optional[int] = None,
             n_jobs: int = 1
     ) -> np.ndarray:
-        if self.InputValidator is None or not self.InputValidator._is_fitted:
+        if self.input_validator is None or not self.input_validator._is_fitted:
             raise ValueError("predict() is only supported after calling search. Kindly call first "
                              "the estimator fit() method.")
 
-        X_test = self.InputValidator.feature_validator.transform(X_test)
+        X_test = self.input_validator.feature_validator.transform(X_test)
         predicted_values = super().predict(X_test, batch_size=batch_size,
                                            n_jobs=n_jobs)
 
         # Allow to predict in the original domain -- that is, the user is not interested
         # in our encoded values
-        return self.InputValidator.target_validator.inverse_transform(predicted_values)
+        return self.input_validator.target_validator.inverse_transform(predicted_values)
@@ -492,7 +492,7 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
                     X[key] = X[key].astype(dtype.name)
                 except Exception as e:
                     # Try inference if possible
-                    self.logger.warning(f"Tried to cast column {key} to {dtype} caused {e}")
+                    self.logger.warning(f'Casting the column {key} to {dtype} caused the exception {e}')
                     pass
         else:
             # Calling for the first time to infer the categories
 
@@ -368,6 +368,8 @@ def __init__(self, backend: Backend,
             self.additional_metrics = get_metrics(dataset_properties=self.dataset_properties,
                                                   all_supported_metrics=all_supported_metrics)
 
+        # See autoPyTorch/pipeline/components/base_component.py::autoPyTorchComponent for more details
+        # about fit_dictionary
         self.fit_dictionary: Dict[str, Any] = {'dataset_properties': self.dataset_properties}
         self._init_params = init_params
         self.fit_dictionary.update({
@@ -380,8 +382,7 @@ def __init__(self, backend: Backend,
         })
 
         # Update fit dictionary with metrics passed to the evaluator
-        metrics_dict: Dict[str, List[str]] = {'additional_metrics': []}
-        metrics_dict['additional_metrics'].append(self.metric.name)
+        metrics_dict: Dict[str, List[str]] = {'additional_metrics': [self.metric.name]}
         if all_supported_metrics:
             assert self.additional_metrics is not None
             for metric in self.additional_metrics:
 
@@ -58,10 +58,12 @@ def __init__(self, backend: Backend, queue: Queue,
             pipeline_config=pipeline_config,
             search_space_updates=search_space_updates
         )
-        assert isinstance(self.datamanager.resampling_strategy, NoResamplingStrategyTypes),\
-            "This Evaluator is used for fitting a pipeline on the whole dataset. " \
-            "Expected 'self.resampling_strategy' to be" \
-            " 'NoResamplingStrategyTypes' got {}".format(self.datamanager.resampling_strategy)
+        if not isinstance(self.datamanager.resampling_strategy, NoResamplingStrategyTypes):
+            raise ValueError(
+                "FitEvaluator needs to be fitted on the whole dataset and resampling_strategy "
+                "must be `NoResamplingStrategyTypes`, but got {}".format(
+                    self.datamanager.resampling_strategy
+                ))
 
         self.splits = self.datamanager.splits
         self.Y_target: Optional[np.ndarray] = None
 
@@ -174,10 +174,9 @@ def __init__(
         elif isinstance(self.resampling_strategy, NoResamplingStrategyTypes):
             eval_function = autoPyTorch.evaluation.fit_evaluator.eval_function
         else:
-            raise ValueError("Unknown resampling strategy specified."
-                             "Expected resampling strategy to be in "
-                             "'(HoldoutValTypes, CrossValTypes, NoResamplingStrategyTypes)"
-                             "got {}".format(self.resampling_strategy))
+            raise ValueError("resampling strategy must be in "
+                             "(HoldoutValTypes, CrossValTypes, NoResamplingStrategyTypes), "
+                             "but got {}.".format(self.resampling_strategy))
 
         self.worst_possible_result = cost_for_crash
 
@@ -319,6 +318,7 @@ def run(
         info: typing.Optional[typing.List[RunValue]]
         additional_run_info: typing.Dict[str, typing.Any]
         try:
+            # By default, self.ta is fit_predict_try_except_decorator
             obj = pynisher.enforce_limits(**pynisher_arguments)(self.ta)
             obj(**obj_kwargs)
         except Exception as e:
 
@@ -71,11 +71,12 @@ def __init__(self, backend: Backend, queue: Queue,
             pipeline_config=pipeline_config,
             search_space_updates=search_space_updates
         )
-        assert isinstance(self.datamanager.resampling_strategy, (CrossValTypes, HoldoutValTypes)),\
-            "This Evaluator is used for HPO Search. " \
-            "Val Split is required for HPO search. " \
-            "Expected 'self.resampling_strategy' in" \
-            " '(CrossValTypes, HoldoutValTypes)' got {}".format(self.datamanager.resampling_strategy)
+
+        if not isinstance(self.datamanager.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
+            raise ValueError(
+                'TrainEvaluator expect to have (CrossValTypes, HoldoutValTypes) as '
+                'resampling_strategy, but got {}'.format(self.datamanager.resampling_strategy)
+            )
 
         self.splits = self.datamanager.splits
         if self.splits is None:
@@ -271,6 +272,8 @@ def _fit_and_predict(self, pipeline: BaseEstimator, fold: int, train_indices: Un
 
         self.indices[fold] = ((train_indices, test_indices))
 
+        # See autoPyTorch/pipeline/components/base_component.py::autoPyTorchComponent for more details
+        # about fit_dictionary
         X = {'train_indices': train_indices,
              'val_indices': test_indices,
              'split_id': fold,
 
@@ -12,8 +12,12 @@ def get_tabular_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator
     Creates a dictionary with two keys,
     numerical- containing list of numerical preprocessors
     categorical- containing list of categorical preprocessors
+
     Args:
         X: fit dictionary
+            See autoPyTorch/pipeline/components/base_component.py::autoPyTorchComponent for more details
+            about fit_dictionary
+
     Returns:
         (Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors
     """
 
@@ -31,11 +31,11 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
 
         # use the get_shaped_neuron_counts to update the number of units
         neuron_counts = get_shaped_neuron_counts(
-            self.config['resnet_shape'],
-            in_features,
-            out_features,
-            self.config['max_units'],
-            self.config['num_groups'] + 2,
+            shape=self.config['resnet_shape'],
+            in_feat=in_features,
+            out_feat=out_features,
+            max_neurons=self.config['max_units'],
+            layer_count=self.config['num_groups'] + 2,
         )[:-1]
         self.config.update(
             {"num_units_%d" % (i): num for i, num in enumerate(neuron_counts)}
@@ -46,11 +46,11 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
             # nr of units for the architecture, since, it is mostly implemented for the
             # output layer, which is part of the head and not of the backbone.
             dropout_shape = get_shaped_neuron_counts(
-                self.config['dropout_shape'],
-                0,
-                0,
-                self.config["max_dropout"],
-                self.config['num_groups'] + 1,
+                shape=self.config['dropout_shape'],
+                in_feat=0,
+                out_feat=0,
+                max_neurons=self.config["max_dropout"],
+                layer_count=self.config['num_groups'] + 1,
             )[:-1]
 
             self.config.update(
 
@@ -33,6 +33,13 @@ def get_output_shape(network: torch.nn.Module, input_shape: typing.Tuple[int, ..
 
 
 class ShakeShakeFunction(Function):
+    """
+    References:
+        Title: Shake-Shake regularization
+        Authors: Xavier Gastaldi
+        URL: https://arxiv.org/pdf/1705.07485.pdf
+        Github URL: https://github.com/hysts/pytorch_shake_shake/blob/master/functions/shake_shake_function.py
+    """
     @staticmethod
     def forward(
         ctx: typing.Any,  # No typing for AutogradContext
@@ -65,6 +72,18 @@ def backward(ctx: typing.Any,
 
 
 class ShakeDropFunction(Function):
+    """
+    References:
+        Title: ShakeDrop Regularization for Deep Residual Learning
+        Authors: Yoshihiro Yamada et. al.
+        URL: https://arxiv.org/pdf/1802.02375.pdf
+
+        Title: ShakeDrop Regularization
+        Authors: Yoshihiro Yamada et. al.
+        URL: https://openreview.net/pdf?id=S1NHaMW0b
+
+        Github URL: https://github.com/owruby/shake-drop_pytorch/blob/master/models/shakedrop.py
+    """
     @staticmethod
     def forward(ctx: typing.Any,
                 x: torch.Tensor,