Skip to content

Commit 4493270

Browse files
authored
Reduce run time of the test (#205)
* In progress, changing te4sts * Reduce time for tests * Fix flake in tests * Patch train in other tests also * Address comments from shuhei and fransisco: * Move base training to pytest * Fix flake in tests * forgot to pass n_samples * stupid error * Address comments from shuhei, remove hardcoding and fix bug in dummy eval function * Skip ensemble test for python >=3.7 and introduce random state for feature processors * fix flake * Remove example workflow * Remove from __init__ in feature preprocessing
1 parent ee07c7e commit 4493270

File tree

18 files changed

+302
-176
lines changed

18 files changed

+302
-176
lines changed

.github/workflows/examples.yml

Lines changed: 0 additions & 39 deletions
This file was deleted.

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from math import ceil, floor
2-
from typing import Any, Dict, Optional, Union
2+
from typing import Any, Dict, Optional
33

44
from ConfigSpace.conditions import EqualsCondition, InCondition
55
from ConfigSpace.configuration_space import ConfigurationSpace
@@ -23,15 +23,14 @@ class KernelPCA(autoPyTorchFeaturePreprocessingComponent):
2323
def __init__(self, n_components: int = 10,
2424
kernel: str = 'rbf', degree: int = 3,
2525
gamma: float = 0.01, coef0: float = 0.0,
26-
random_state: Optional[Union[int, np.random.RandomState]] = None
27-
) -> None:
26+
random_state: Optional[np.random.RandomState] = None
27+
):
2828
self.n_components = n_components
2929
self.kernel = kernel
3030
self.degree = degree
3131
self.gamma = gamma
3232
self.coef0 = coef0
33-
self.random_state = random_state
34-
super().__init__()
33+
super().__init__(random_state=random_state)
3534

3635
self.add_fit_requirements([
3736
FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)])

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from math import ceil, floor
2-
from typing import Any, Dict, Optional, Union
2+
from typing import Any, Dict, Optional
33

44
from ConfigSpace.conditions import EqualsCondition, InCondition
55
from ConfigSpace.configuration_space import ConfigurationSpace
@@ -23,15 +23,14 @@ class Nystroem(autoPyTorchFeaturePreprocessingComponent):
2323
def __init__(self, n_components: int = 10,
2424
kernel: str = 'rbf', degree: int = 3,
2525
gamma: float = 0.01, coef0: float = 0.0,
26-
random_state: Optional[Union[int, np.random.RandomState]] = None
27-
) -> None:
26+
random_state: Optional[np.random.RandomState] = None
27+
):
2828
self.n_components = n_components
2929
self.kernel = kernel
3030
self.degree = degree
3131
self.gamma = gamma
3232
self.coef0 = coef0
33-
self.random_state = random_state
34-
super().__init__()
33+
super().__init__(random_state=random_state)
3534

3635
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
3736

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Dict, Optional, Union
1+
from typing import Any, Dict, Optional
22

33
from ConfigSpace.configuration_space import ConfigurationSpace
44
from ConfigSpace.hyperparameters import (
@@ -19,13 +19,12 @@
1919
class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent):
2020
def __init__(self, degree: int = 2, interaction_only: bool = False,
2121
include_bias: bool = False,
22-
random_state: Optional[Union[int, np.random.RandomState]] = None):
22+
random_state: Optional[np.random.RandomState] = None):
2323
self.degree = degree
2424
self.interaction_only = interaction_only
2525
self.include_bias = include_bias
2626

27-
self.random_state = random_state
28-
super().__init__()
27+
super().__init__(random_state=random_state)
2928

3029
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
3130
self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures(

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Dict, Optional, Union
1+
from typing import Any, Dict, Optional
22

33
from ConfigSpace.configuration_space import ConfigurationSpace
44
from ConfigSpace.hyperparameters import (
@@ -17,11 +17,10 @@
1717

1818
class PowerTransformer(autoPyTorchFeaturePreprocessingComponent):
1919
def __init__(self, standardize: bool = True,
20-
random_state: Optional[Union[int, np.random.RandomState]] = None):
20+
random_state: Optional[np.random.RandomState] = None):
2121
self.standardize = standardize
2222

23-
self.random_state = random_state
24-
super().__init__()
23+
super().__init__(random_state=random_state)
2524

2625
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
2726
self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson",

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from math import ceil, floor
2-
from typing import Any, Dict, Optional, Union
2+
from typing import Any, Dict, Optional
33

44
from ConfigSpace.configuration_space import ConfigurationSpace
55
from ConfigSpace.hyperparameters import (
@@ -20,12 +20,11 @@
2020
class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
2121
def __init__(self, n_components: int = 100,
2222
gamma: float = 1.0,
23-
random_state: Optional[Union[int, np.random.RandomState]] = None
24-
) -> None:
23+
random_state: Optional[np.random.RandomState] = None
24+
):
2525
self.n_components = n_components
2626
self.gamma = gamma
27-
self.random_state = random_state
28-
super().__init__()
27+
super().__init__(random_state=random_state)
2928

3029
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
3130

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from math import floor
2-
from typing import Any, Dict, Optional, Union
2+
from typing import Any, Dict, Optional
33

44
from ConfigSpace.configuration_space import ConfigurationSpace
55
from ConfigSpace.hyperparameters import (
@@ -18,11 +18,10 @@
1818

1919
class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent):
2020
def __init__(self, target_dim: int = 128,
21-
random_state: Optional[Union[int, np.random.RandomState]] = None):
21+
random_state: Optional[np.random.RandomState] = None):
2222
self.target_dim = target_dim
2323

24-
self.random_state = random_state
25-
super().__init__()
24+
super().__init__(random_state=random_state)
2625

2726
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
2827

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
from typing import Any, Dict, List
1+
from typing import Any, Dict, List, Optional
2+
3+
import numpy as np
4+
5+
from sklearn.utils import check_random_state
26

37
from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
48
autoPyTorchTabularPreprocessingComponent
@@ -8,7 +12,13 @@
812
class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
913
_required_properties: List[str] = ['handles_sparse']
1014

11-
def __init__(self) -> None:
15+
def __init__(self, random_state: Optional[np.random.RandomState] = None):
16+
if random_state is None:
17+
# A trainer components need a random state for
18+
# sampling -- for example in MixUp training
19+
self.random_state = check_random_state(1)
20+
else:
21+
self.random_state = random_state
1222
super().__init__()
1323

1424
def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:

test/conftest.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
from autoPyTorch.utils.pipeline import get_dataset_requirements
2626

2727

28+
N_SAMPLES = 200
29+
30+
2831
@pytest.fixture(scope="session")
2932
def callattr_ahead_of_alltests(request):
3033
"""
@@ -191,7 +194,7 @@ def session_run_at_end():
191194
def get_tabular_data(task):
192195
if task == "classification_numerical_only":
193196
X, y = make_classification(
194-
n_samples=200,
197+
n_samples=N_SAMPLES,
195198
n_features=4,
196199
n_informative=3,
197200
n_redundant=1,
@@ -207,18 +210,18 @@ def get_tabular_data(task):
207210
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
208211
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
209212
X = X[categorical_columns]
210-
X = X.iloc[0:200]
211-
y = y.iloc[0:200]
213+
X = X.iloc[0:N_SAMPLES]
214+
y = y.iloc[0:N_SAMPLES]
212215
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())
213216

214217
elif task == "classification_numerical_and_categorical":
215218
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
216-
X = X.iloc[0:200]
217-
y = y.iloc[0:200]
219+
X = X.iloc[0:N_SAMPLES]
220+
y = y.iloc[0:N_SAMPLES]
218221
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())
219222

220223
elif task == "regression_numerical_only":
221-
X, y = make_regression(n_samples=200,
224+
X, y = make_regression(n_samples=N_SAMPLES,
222225
n_features=4,
223226
n_informative=3,
224227
n_targets=1,
@@ -240,8 +243,8 @@ def get_tabular_data(task):
240243
else:
241244
X[column] = X[column].fillna(0)
242245

243-
X = X.iloc[0:200]
244-
y = y.iloc[0:200]
246+
X = X.iloc[0:N_SAMPLES]
247+
y = y.iloc[0:N_SAMPLES]
245248
y = (y - y.mean()) / y.std()
246249
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
247250

@@ -256,8 +259,8 @@ def get_tabular_data(task):
256259
else:
257260
X[column] = X[column].fillna(0)
258261

259-
X = X.iloc[0:200]
260-
y = y.iloc[0:200]
262+
X = X.iloc[0:N_SAMPLES]
263+
y = y.iloc[0:N_SAMPLES]
261264
y = (y - y.mean()) / y.std()
262265
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
263266
elif task == 'iris':
@@ -288,7 +291,7 @@ def get_fit_dictionary(X, y, validator, backend):
288291
'num_run': np.random.randint(50),
289292
'device': 'cpu',
290293
'budget_type': 'epochs',
291-
'epochs': 100,
294+
'epochs': 5,
292295
'torch_num_threads': 1,
293296
'early_stopping': 10,
294297
'working_dir': '/tmp',
@@ -326,7 +329,7 @@ def dataset(request):
326329
@pytest.fixture
327330
def dataset_traditional_classifier_num_only():
328331
X, y = make_classification(
329-
n_samples=200,
332+
n_samples=N_SAMPLES,
330333
n_features=4,
331334
n_informative=3,
332335
n_redundant=1,
@@ -344,15 +347,15 @@ def dataset_traditional_classifier_categorical_only():
344347
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
345348
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
346349
X = X[categorical_columns]
347-
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
350+
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
348351
return X, y
349352

350353

351354
@pytest.fixture
352355
def dataset_traditional_classifier_num_categorical():
353356
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
354357
y = y.astype(np.int)
355-
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
358+
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
356359
return X, y
357360

358361

@@ -456,3 +459,8 @@ def loss_mse():
456459
@pytest.fixture
457460
def loss_details(request):
458461
return request.getfixturevalue(request.param)
462+
463+
464+
@pytest.fixture
465+
def n_samples():
466+
return N_SAMPLES

0 commit comments

Comments
 (0)