Skip to content

Commit 9cdfb64

Browse files
committed
Fix flake and mypy errors
1 parent ed48dab commit 9cdfb64

File tree

13 files changed

+72
-72
lines changed

13 files changed

+72
-72
lines changed

autoPyTorch/api/base_task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1348,7 +1348,7 @@ def fit_ensemble(
13481348
ensemble_size: int = 50,
13491349
load_models: bool = True,
13501350
time_for_task: int = 100,
1351-
func_eval_time_limit_secs: Optional[int] = None,
1351+
func_eval_time_limit_secs: int = 50,
13521352
enable_traditional_pipeline: bool = True,
13531353
) -> 'BaseTask':
13541354
"""

autoPyTorch/api/tabular_classification.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ def search(
275275
y_test=y_test,
276276
dataset_name=dataset_name)
277277

278+
assert self.dataset is not None, "Something went wrong, expected dataset to be initialised"
278279
return self._search(
279280
dataset=self.dataset,
280281
optimize_metric=optimize_metric,

autoPyTorch/api/tabular_regression.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ def search(
261261
y_test=y_test,
262262
dataset_name=dataset_name)
263263

264+
assert self.dataset is not None, "Something went wrong, expected dataset to be initialised"
264265
return self._search(
265266
dataset=self.dataset,
266267
optimize_metric=optimize_metric,

autoPyTorch/data/tabular_feature_validator.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,35 +9,32 @@
99
import scipy.sparse
1010

1111
import sklearn.utils
12-
1312
from sklearn.base import BaseEstimator
1413
from sklearn.compose import ColumnTransformer
1514
from sklearn.exceptions import NotFittedError
16-
from sklearn.pipeline import make_pipeline
17-
from sklearn.preprocessing import OneHotEncoder
1815
from sklearn.impute import SimpleImputer
19-
from sklearn.preprocessing import StandardScaler
16+
from sklearn.pipeline import make_pipeline
17+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
2018

2119
from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES
2220

2321

2422
def _create_column_transformer(
25-
preprocessors: Dict[str, List[BaseEstimator]],
26-
numerical_columns: List[str],
27-
categorical_columns: List[str]
23+
preprocessors: Dict[str, List[BaseEstimator]],
24+
numerical_columns: List[str],
25+
categorical_columns: List[str]
2826
) -> ColumnTransformer:
2927
"""
30-
Given a dictionary of preprocessors, this function
31-
creates a sklearn column transformer with appropriate
32-
columns associated with their preprocessors.
28+
Given a dictionary of preprocessors, this function
29+
creates a sklearn column transformer with appropriate
30+
columns associated with their preprocessors.
3331
Args:
34-
preprocessors (Dict[str, List]):
32+
preprocessors (Dict[str, List]):
3533
Dictionary containing list of numerical and categorical preprocessors.
3634
numerical_columns (List[int]):
3735
List of names of numerical columns
3836
categorical_columns (List[int]):
3937
List of names of categorical columns
40-
4138
Returns:
4239
ColumnTransformer
4340
"""
@@ -57,12 +54,12 @@ def _create_column_transformer(
5754

5855
def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
5956
"""
60-
This function creates a Dictionary containing list
57+
This function creates a Dictionary containing list
6158
of numerical and categorical preprocessors
6259
Returns:
63-
60+
Dict[str, List[BaseEstimator]]
6461
"""
65-
preprocessors = dict()
62+
preprocessors: Dict[str, List[BaseEstimator]] = dict()
6663
preprocessors['numerical'] = list()
6764
preprocessors['categorical'] = list()
6865

@@ -144,12 +141,12 @@ def comparator(cmp1: str, cmp2: str) -> int:
144141
)
145142

146143
if len(categorical_columns) > 0:
147-
print(self.column_transformer.named_transformers_['categorical_pipeline'].named_steps)
148144
self.categories = [
149145
# We fit an ordinal encoder, where all categorical
150146
# columns are shifted to the left
151147
list(range(len(cat)))
152-
for cat in self.column_transformer.named_transformers_['categorical_pipeline'].named_steps['onehotencoder'].categories_
148+
for cat in self.column_transformer.named_transformers_[
149+
'categorical_pipeline'].named_steps['onehotencoder'].categories_
153150
]
154151

155152
for i, type_ in enumerate(self.feat_type):
@@ -284,7 +281,7 @@ def _check_data(
284281
raise ValueError("Changing the column order of the features after fit() is "
285282
"not supported. Fit() method was called with "
286283
"{} whereas the new features have {} as type".format(self.column_order,
287-
column_order,)
284+
column_order, )
288285
)
289286
else:
290287
self.column_order = column_order
@@ -411,7 +408,7 @@ def list_to_dataframe(
411408

412409
@staticmethod
413410
def numpy_array_to_pandas(
414-
X: np.ndarray,
411+
X: np.ndarray,
415412
) -> pd.DataFrame:
416413
"""
417414
Converts a numpy array to pandas for type inference
@@ -457,7 +454,9 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
457454
self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}")
458455
return X
459456

460-
def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) -> pd.DataFrame:
457+
def impute_nan_in_categories(self,
458+
X: pd.DataFrame
459+
) -> pd.DataFrame:
461460
"""
462461
impute missing values before encoding,
463462
remove once sklearn natively supports
@@ -489,8 +488,7 @@ def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) ->
489488
if can_cast_as_number:
490489
# In this case, we expect to have a number as category
491490
# it might be string, but its value represent a number
492-
missing_value: Union[str, int] = '-1' if isinstance(X[column].dropna().values[0],
493-
str) else -1
491+
missing_value: Union[str, int] = '-1' if isinstance(X[column].dropna().values[0], str) else -1
494492
else:
495493
missing_value = 'Missing!'
496494

@@ -509,4 +507,4 @@ def impute_nan_in_categories(self, X: pd.DataFrame, categorical_columns=None) ->
509507
X[column].cat.add_categories([self.dict_missing_value_per_col[column]],
510508
inplace=True)
511509
X.fillna({column: self.dict_missing_value_per_col[column]}, inplace=True)
512-
return X
510+
return X

autoPyTorch/datasets/base_dataset.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,13 +330,19 @@ def get_dataset_for_training(self, split_id: int, train: bool, subset: int = 0)
330330
to provide training data to fit a pipeline
331331
332332
Args:
333-
split (int): The desired subset of the dataset to split and use
333+
split_id (int): which split id to get from the splits
334+
train (bool): whether the train or valid transforms are to be applied
335+
subset (int, default=0): 0 is for train_indices, 1 is for valid_indices
334336
335337
Returns:
338+
336339
Dataset: the reduced dataset to be used for testing
337340
"""
338341
# Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
339-
return TransformSubset(self, self.splits[split_id][subset], train=train)
342+
assert split_id <= len(self.splits), "Expected split id to be less than length of splits"
343+
indices = self.splits[split_id][subset]
344+
assert indices is not None, "Trying to get subset when it does not exist"
345+
return TransformSubset(self, indices, train=train)
340346

341347
def replace_data(self, X_train: BaseDatasetInputType,
342348
X_test: Optional[BaseDatasetInputType]) -> 'BaseDataset':

autoPyTorch/pipeline/base_pipeline.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -451,13 +451,14 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
451451
continue
452452
raise ValueError("Unknown hyperparameter for component {}. "
453453
"Expected update hyperparameter "
454-
"to be in {} got {}. choice is {}".format(node.__class__.__name__,
455-
component.
456-
get_hyperparameter_search_space(
457-
dataset_properties=self.dataset_properties).
458-
get_hyperparameter_names(),
459-
split_hyperparameter[1],
460-
component.__name__))
454+
"to be in {} got {}."
455+
" component is {}".format(node.__class__.__name__,
456+
component.get_hyperparameter_search_space(
457+
dataset_properties=self.dataset_properties
458+
).get_hyperparameter_names(),
459+
split_hyperparameter[1],
460+
component.__name__)
461+
)
461462
else:
462463
if update.hyperparameter not in node.get_hyperparameter_search_space(
463464
dataset_properties=self.dataset_properties):

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import numpy as np
44

55
from sklearn.compose import ColumnTransformer
6-
from sklearn.pipeline import make_pipeline
7-
import time
6+
# from sklearn.pipeline import make_pipeline
7+
88
import torch
99

1010
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
1111
autoPyTorchTabularPreprocessingComponent
1212
)
13-
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
13+
# from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
1414
from autoPyTorch.utils.common import FitRequirement, subsampler
1515

1616

autoPyTorch/pipeline/components/setup/network_backbone/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def shake_get_alpha_beta(
117117
beta = torch.FloatTensor([0.5])
118118
elif method == 'M3':
119119
beta = torch.FloatTensor(
120-
[torch.rand(1)*(0.5 - alpha)*alpha if alpha < 0.5 else torch.rand(1)*(alpha - 0.5)*alpha]
120+
[torch.rand(1) * (0.5 - alpha) * alpha if alpha < 0.5 else torch.rand(1) * (alpha - 0.5) * alpha]
121121
)
122122
else:
123123
raise ValueError("Unknown method for ShakeShakeRegularisation in NetworkBackbone")

autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import copy
2-
from typing import Any, Dict, Optional, Tuple
1+
# import copy
2+
from typing import Any, Dict, Optional # , Tuple
33

44
import numpy as np
55

@@ -30,23 +30,23 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
3030

3131
def build_embedding(self, num_input_features: np.ndarray, num_numerical_features: int) -> nn.Module:
3232
raise NotImplementedError
33-
34-
def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
35-
# Feature preprocessors can alter numerical columns
36-
# if len(X['dataset_properties']['numerical_columns']) == 0:
37-
# num_numerical_columns = 0
38-
# else:
39-
# X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
40-
#
41-
# numerical_column_transformer = X['tabular_transformer'].preprocessor. \
42-
# named_transformers_['numerical_pipeline']
43-
# num_numerical_columns = numerical_column_transformer.transform(
44-
# X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
45-
# num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
46-
# dtype=int)
47-
# categories = X['dataset_properties']['categories']
48-
#
49-
# for i, category in enumerate(categories):
50-
# num_input_features[num_numerical_columns + i, ] = len(category)
51-
# return num_numerical_columns, num_input_features
52-
return None, None
33+
#
34+
# def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
35+
# # Feature preprocessors can alter numerical columns
36+
# # if len(X['dataset_properties']['numerical_columns']) == 0:
37+
# # num_numerical_columns = 0
38+
# # else:
39+
# # X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
40+
# #
41+
# # numerical_column_transformer = X['tabular_transformer'].preprocessor. \
42+
# # named_transformers_['numerical_pipeline']
43+
# # num_numerical_columns = numerical_column_transformer.transform(
44+
# # X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
45+
# # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
46+
# # dtype=int)
47+
# # categories = X['dataset_properties']['categories']
48+
# #
49+
# # for i, category in enumerate(categories):
50+
# # num_input_features[num_numerical_columns + i, ] = len(category)
51+
# # return num_numerical_columns, num_input_features
52+
# return None, None

autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
import torch
5+
# import torch
66

77
from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent
88
from autoPyTorch.pipeline.components.training.trainer.cutout_utils import CutOut
@@ -40,14 +40,15 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
4040
indices = self.random_state.choice(range(size), max(1, np.int32(size * self.patch_ratio)),
4141
replace=False)
4242

43-
"""if not isinstance(self.numerical_columns, typing.Iterable):
43+
"""
44+
if not isinstance(self.numerical_columns, typing.Iterable):
4445
raise ValueError("{} requires numerical columns information of {}"
4546
"to prepare data got {}.".format(self.__class__.__name__,
4647
typing.Iterable,
4748
self.numerical_columns))
4849
numerical_indices = torch.tensor(self.numerical_columns)
4950
categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns])
50-
51+
5152
# We use an ordinal encoder on the categorical columns of tabular data
5253
# -1 is the conceptual equivalent to 0 in a image, that does not
5354
# have color as a feature and hence the network has to learn to deal

autoPyTorch/pipeline/components/training/trainer/base_trainer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,6 @@ def __init__(self, weighted_loss: int = 0,
221221
self.add_fit_requirements([
222222
FitRequirement("is_cyclic_scheduler", (bool,), user_defined=False, dataset_property=False),
223223
])
224-
self.batch_fit_times = []
225-
self.data_loading_times = []
226224

227225
def prepare(
228226
self,

autoPyTorch/pipeline/tabular_classification.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import numpy as np
99

10-
import sklearn.preprocessing
1110
from sklearn.base import ClassifierMixin
1211

1312
import torch
@@ -91,7 +90,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray:
9190
loader = self.named_steps['data_loader'].get_loader(X=X)
9291
pred = self.named_steps['network'].predict(loader)
9392
if isinstance(self.dataset_properties['output_shape'], int):
94-
return pred
93+
return pred
9594

9695
else:
9796
all_proba = []
@@ -140,11 +139,6 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n
140139
pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None)
141140
y[batch_from:batch_to] = pred_prob.astype(np.float32)
142141

143-
# Neural networks might not be fit to produce a [0-1] output
144-
# For instance, after small number of epochs.
145-
# y = np.clip(y, 0, 1)
146-
# y = sklearn.preprocessing.normalize(y, axis=1, norm='l1')
147-
148142
return y
149143

150144
def _get_hyperparameter_search_space(self,

autoPyTorch/utils/backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def load_datamanager(self) -> BaseDataset:
328328
with open(filepath, 'rb') as fh:
329329
return pickle.load(fh)
330330

331-
def replace_datamanager(self, datamanager: BaseDataset):
331+
def replace_datamanager(self, datamanager: BaseDataset) -> None:
332332
warnings.warn("Original dataset will be overwritten with the provided dataset")
333333
os.remove(self._get_datamanager_pickle_filename())
334334
self.save_datamanager(datamanager=datamanager)

0 commit comments

Comments
 (0)