Skip to content

Commit a931763

Browse files
committed
Address comments from shuhei
1 parent b59f1c2 commit a931763

File tree

7 files changed

+88
-91
lines changed

7 files changed

+88
-91
lines changed

autoPyTorch/evaluation/abstract_evaluator.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@
1616

1717
from smac.tae import StatusType
1818

19-
import autoPyTorch.pipeline.image_classification
20-
import autoPyTorch.pipeline.tabular_classification
21-
import autoPyTorch.pipeline.tabular_regression
22-
import autoPyTorch.pipeline.traditional_tabular_classification
23-
import autoPyTorch.pipeline.traditional_tabular_regression
2419
from autoPyTorch.automl_common.common.utils.backend import Backend
2520
from autoPyTorch.constants import (
2621
CLASSIFICATION_TASKS,
@@ -42,6 +37,11 @@
4237
calculate_loss,
4338
get_metrics,
4439
)
40+
from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline
41+
from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
42+
from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
43+
from autoPyTorch.pipeline.traditional_tabular_classification import TraditionalTabularClassificationPipeline
44+
from autoPyTorch.pipeline.traditional_tabular_regression import TraditionalTabularRegressionPipeline
4545
from autoPyTorch.utils.common import subsampler
4646
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
4747
from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
@@ -65,7 +65,7 @@ class MyTraditionalTabularClassificationPipeline(BaseEstimator):
6565
Attributes:
6666
dataset_properties (Dict[str, Any]):
6767
A dictionary containing dataset specific information
68-
random_state (Optional[Union[int, np.random.RandomState]]):
68+
random_state (Optional[np.random.RandomState]):
6969
Object that contains a seed and allows for reproducible results
7070
init_params (Optional[Dict]):
7171
An optional dictionary that is passed to the pipeline's steps. It complies
@@ -74,15 +74,14 @@ class MyTraditionalTabularClassificationPipeline(BaseEstimator):
7474

7575
def __init__(self, config: str,
7676
dataset_properties: Dict[str, Any],
77-
random_state: Optional[Union[int, np.random.RandomState]] = None,
77+
random_state: Optional[np.random.RandomState] = None,
7878
init_params: Optional[Dict] = None):
7979
self.config = config
8080
self.dataset_properties = dataset_properties
8181
self.random_state = random_state
8282
self.init_params = init_params
83-
self.pipeline = autoPyTorch.pipeline.traditional_tabular_classification.\
84-
TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties,
85-
random_state=self.random_state)
83+
self.pipeline = TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties,
84+
random_state=self.random_state)
8685
configuration_space = self.pipeline.get_hyperparameter_search_space()
8786
default_configuration = configuration_space.get_default_configuration().get_dictionary()
8887
default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config
@@ -120,8 +119,7 @@ def get_pipeline_representation(self) -> Dict[str, str]:
120119

121120
@staticmethod
122121
def get_default_pipeline_options() -> Dict[str, Any]:
123-
return autoPyTorch.pipeline.traditional_tabular_classification. \
124-
TraditionalTabularClassificationPipeline.get_default_pipeline_options()
122+
return TraditionalTabularClassificationPipeline.get_default_pipeline_options()
125123

126124

127125
class MyTraditionalTabularRegressionPipeline(BaseEstimator):
@@ -136,23 +134,22 @@ class MyTraditionalTabularRegressionPipeline(BaseEstimator):
136134
Attributes:
137135
dataset_properties (Dict[str, Any]):
138136
A dictionary containing dataset specific information
139-
random_state (Optional[Union[int, np.random.RandomState]]):
137+
random_state (Optional[np.random.RandomState]):
140138
Object that contains a seed and allows for reproducible results
141139
init_params (Optional[Dict]):
142140
An optional dictionary that is passed to the pipeline's steps. It complies
143141
a similar function as the kwargs
144142
"""
145143
def __init__(self, config: str,
146144
dataset_properties: Dict[str, Any],
147-
random_state: Optional[Union[int, np.random.RandomState]] = None,
145+
random_state: Optional[np.random.RandomState] = None,
148146
init_params: Optional[Dict] = None):
149147
self.config = config
150148
self.dataset_properties = dataset_properties
151149
self.random_state = random_state
152150
self.init_params = init_params
153-
self.pipeline = autoPyTorch.pipeline.traditional_tabular_regression.\
154-
TraditionalTabularRegressionPipeline(dataset_properties=dataset_properties,
155-
random_state=self.random_state)
151+
self.pipeline = TraditionalTabularRegressionPipeline(dataset_properties=dataset_properties,
152+
random_state=self.random_state)
156153
configuration_space = self.pipeline.get_hyperparameter_search_space()
157154
default_configuration = configuration_space.get_default_configuration().get_dictionary()
158155
default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config
@@ -185,8 +182,7 @@ def get_pipeline_representation(self) -> Dict[str, str]:
185182

186183
@staticmethod
187184
def get_default_pipeline_options() -> Dict[str, Any]:
188-
return autoPyTorch.pipeline.traditional_tabular_regression. \
189-
TraditionalTabularRegressionPipeline.get_default_pipeline_options()
185+
return TraditionalTabularRegressionPipeline.get_default_pipeline_options()
190186

191187

192188
class DummyClassificationPipeline(DummyClassifier):
@@ -460,7 +456,7 @@ def __init__(self, backend: Backend,
460456
elif isinstance(self.configuration, str):
461457
self.pipeline_class = MyTraditionalTabularRegressionPipeline
462458
elif isinstance(self.configuration, Configuration):
463-
self.pipeline_class = autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline
459+
self.pipeline_class = TabularRegressionPipeline
464460
else:
465461
raise ValueError('task {} not available'.format(self.task_type))
466462
self.predict_function = self._predict_regression
@@ -474,9 +470,9 @@ def __init__(self, backend: Backend,
474470
raise ValueError("Only tabular tasks are currently supported with traditional methods")
475471
elif isinstance(self.configuration, Configuration):
476472
if self.task_type in TABULAR_TASKS:
477-
self.pipeline_class = autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline
473+
self.pipeline_class = TabularClassificationPipeline
478474
elif self.task_type in IMAGE_TASKS:
479-
self.pipeline_class = autoPyTorch.pipeline.image_classification.ImageClassificationPipeline
475+
self.pipeline_class = ImageClassificationPipeline
480476
else:
481477
raise ValueError('task {} not available'.format(self.task_type))
482478
self.predict_function = self._predict_proba

autoPyTorch/pipeline/components/setup/traditional_ml/traditional_learner/learners.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner.base_traditional_learner import \
2121
BaseTraditionalLearner
2222
from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner.utils import (
23-
AUTOPYTORCH_TO_CATBOOST_METRICS
23+
AutoPyTorchToCatboostMetrics
2424
)
2525

2626

@@ -112,11 +112,11 @@ def _prepare_model(self,
112112
y_train: np.ndarray
113113
) -> None:
114114
if not self.is_classification:
115-
self.config['eval_metric'] = AUTOPYTORCH_TO_CATBOOST_METRICS.get(self.metric.name, 'R2')
115+
self.config['eval_metric'] = AutoPyTorchToCatboostMetrics[self.metric.name].value
116116
# CatBoost Cannot handle a random state object, just the seed
117117
self.model = CatBoostRegressor(**self.config, random_state=self.random_state.get_state()[1][0])
118118
else:
119-
self.config['eval_metric'] = AUTOPYTORCH_TO_CATBOOST_METRICS.get(self.metric.name, 'Accuracy')
119+
self.config['eval_metric'] = AutoPyTorchToCatboostMetrics[self.metric.name].value
120120
# CatBoost Cannot handle a random state object, just the seed
121121
self.model = CatBoostClassifier(**self.config, random_state=self.random_state.get_state()[1][0])
122122

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
AUTOPYTORCH_TO_CATBOOST_METRICS = {
2-
"mean_absolute_error": "MAE",
3-
"root_mean_squared_error": "RMSE",
4-
"mean_squared_log_error": "MSLE",
5-
"r2": "R2",
6-
"accuracy": "Accuracy",
7-
"balanced_accuracy": "BalancedAccuracy",
8-
"f1": "F1",
9-
"roc_auc": "AUC",
10-
"precision": "Precision",
11-
"recall": "Recall",
12-
"log_loss": "Logloss"
13-
}
1+
from enum import Enum
2+
3+
4+
class AutoPyTorchToCatboostMetrics(Enum):
5+
mean_absolute_error = "MAE"
6+
root_mean_squared_error = "RMSE"
7+
mean_squared_log_error = "MSLE"
8+
r2 = "R2"
9+
accuracy = "Accuracy"
10+
balanced_accuracy = "BalancedAccuracy"
11+
f1 = "F1"
12+
roc_auc = "AUC"
13+
precision = "Precision"
14+
recall = "Recall"
15+
log_loss = "Logloss"

autoPyTorch/pipeline/components/training/trainer/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
250250
# Support additional user metrics
251251
additional_metrics = X['additional_metrics'] if 'additional_metrics' in X else None
252252
if 'optimize_metric' in X:
253-
additional_metrics = additional_metrics.append(X['optimize_metric']) if additional_metrics is not None\
253+
additional_metrics = additional_metrics.append(X['optimize_metric']) if additional_metrics is not None \
254254
else [X['optimize_metric']]
255255
additional_losses = X['additional_losses'] if 'additional_losses' in X else None
256256
self.choice.prepare(

autoPyTorch/pipeline/traditional_tabular_regression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from autoPyTorch.pipeline.base_pipeline import BasePipeline
1111
from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
12-
from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
12+
from autoPyTorch.pipeline.components.setup.traditional_ml import ModelChoice
1313

1414

1515
class TraditionalTabularRegressionPipeline(RegressorMixin, BasePipeline):

test/test_api/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232
HoldoutValTypes,
3333
)
3434
from autoPyTorch.datasets.tabular_dataset import TabularDataset
35-
from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import _traditional_learners
3635
from autoPyTorch.optimizer.smbo import AutoMLSMBO
36+
from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import _traditional_learners
3737
from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
3838

3939

test/test_pipeline/components/setup/test_setup_traditional_models.py

Lines changed: 49 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import pytest
99

10-
from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
10+
from autoPyTorch.pipeline.components.setup.traditional_ml import ModelChoice
1111
from autoPyTorch.pipeline.components.setup.traditional_ml.tabular_traditional_model import TabularTraditionalModel
1212

1313

@@ -83,52 +83,51 @@ def test_get_set_config_space(self, dataset_properties):
8383
"regression_categorical_only",
8484
"regression_numerical_and_categorical"
8585
], indirect=True)
86-
class TestTraditionalModels:
87-
def test_model_fit_predict_score(self, traditional_learner, fit_dictionary_tabular):
88-
89-
if len(fit_dictionary_tabular['dataset_properties']['numerical_columns']) == 0 and traditional_learner == 'knn':
90-
pytest.skip("knn can not work with categorical only data")
91-
92-
model = TabularTraditionalModel(traditional_learner=traditional_learner)
93-
94-
blockPrint()
95-
model.fit(X=fit_dictionary_tabular)
96-
enablePrint()
97-
98-
assert isinstance(model.fit_output, dict)
99-
assert 'val_preds' in model.fit_output.keys()
100-
assert isinstance(model.fit_output['val_preds'], list)
101-
assert len(model.fit_output['val_preds']) == len(fit_dictionary_tabular['val_indices'])
102-
if model.model.is_classification:
103-
assert len(model.fit_output['val_preds'][0]) == len(np.unique(fit_dictionary_tabular['y_train']))
104-
assert len(np.argwhere(0 > np.array(model.fit_output['val_preds']).all() > 1)) == 0
105-
assert 'labels' in model.fit_output.keys()
106-
assert len(model.fit_output['labels']) == len(fit_dictionary_tabular['val_indices'])
107-
assert 'train_score' in model.fit_output.keys()
108-
assert isinstance(model.fit_output['train_score'], float)
109-
assert 'val_score' in model.fit_output.keys()
110-
assert isinstance(model.fit_output['val_score'], float)
111-
112-
# Test if traditional model can predict on val set
113-
if model.model.is_classification:
114-
y_pred = model.predict_proba(fit_dictionary_tabular['X_train'][fit_dictionary_tabular['val_indices']])
115-
else:
116-
y_pred = model.predict(fit_dictionary_tabular['X_train'][fit_dictionary_tabular['val_indices']])
117-
118-
assert np.allclose(y_pred.squeeze(), model.fit_output['val_preds'], atol=1e-04)
119-
assert y_pred.shape[0] == len(fit_dictionary_tabular['val_indices'])
120-
# Test if classifier can score and
121-
# the result is same as in results
122-
score = model.score(fit_dictionary_tabular['X_train'][fit_dictionary_tabular['val_indices']],
123-
fit_dictionary_tabular['y_train'][fit_dictionary_tabular['val_indices']])
124-
assert np.allclose(score, model.fit_output['val_score'], atol=1e-6)
125-
126-
if sys.version_info >= (3, 7):
127-
dump_file = os.path.join(fit_dictionary_tabular['backend'].temporary_directory, 'dump.pkl')
128-
129-
with open(dump_file, 'wb') as f:
130-
pickle.dump(model, f)
131-
132-
with open(dump_file, 'rb') as f:
133-
restored_estimator = pickle.load(f)
134-
restored_estimator.predict(fit_dictionary_tabular['X_train'])
86+
def test_model_fit_predict_score(traditional_learner, fit_dictionary_tabular):
87+
88+
if len(fit_dictionary_tabular['dataset_properties']['numerical_columns']) == 0 and traditional_learner == 'knn':
89+
pytest.skip("knn can not work with categorical only data")
90+
91+
model = TabularTraditionalModel(traditional_learner=traditional_learner)
92+
93+
blockPrint()
94+
model.fit(X=fit_dictionary_tabular)
95+
enablePrint()
96+
97+
assert isinstance(model.fit_output, dict)
98+
assert 'val_preds' in model.fit_output.keys()
99+
assert isinstance(model.fit_output['val_preds'], list)
100+
assert len(model.fit_output['val_preds']) == len(fit_dictionary_tabular['val_indices'])
101+
if model.model.is_classification:
102+
assert len(model.fit_output['val_preds'][0]) == len(np.unique(fit_dictionary_tabular['y_train']))
103+
assert len(np.argwhere(0 > np.array(model.fit_output['val_preds']).all() > 1)) == 0
104+
assert 'labels' in model.fit_output.keys()
105+
assert len(model.fit_output['labels']) == len(fit_dictionary_tabular['val_indices'])
106+
assert 'train_score' in model.fit_output.keys()
107+
assert isinstance(model.fit_output['train_score'], float)
108+
assert 'val_score' in model.fit_output.keys()
109+
assert isinstance(model.fit_output['val_score'], float)
110+
111+
# Test if traditional model can predict on val set
112+
if model.model.is_classification:
113+
y_pred = model.predict_proba(fit_dictionary_tabular['X_train'][fit_dictionary_tabular['val_indices']])
114+
else:
115+
y_pred = model.predict(fit_dictionary_tabular['X_train'][fit_dictionary_tabular['val_indices']])
116+
117+
assert np.allclose(y_pred.squeeze(), model.fit_output['val_preds'], atol=1e-04)
118+
assert y_pred.shape[0] == len(fit_dictionary_tabular['val_indices'])
119+
# Test if classifier can score and
120+
# the result is same as in results
121+
score = model.score(fit_dictionary_tabular['X_train'][fit_dictionary_tabular['val_indices']],
122+
fit_dictionary_tabular['y_train'][fit_dictionary_tabular['val_indices']])
123+
assert np.allclose(score, model.fit_output['val_score'], atol=1e-6)
124+
125+
if sys.version_info >= (3, 7):
126+
dump_file = os.path.join(fit_dictionary_tabular['backend'].temporary_directory, 'dump.pkl')
127+
128+
with open(dump_file, 'wb') as f:
129+
pickle.dump(model, f)
130+
131+
with open(dump_file, 'rb') as f:
132+
restored_estimator = pickle.load(f)
133+
restored_estimator.predict(fit_dictionary_tabular['X_train'])

0 commit comments

Comments
 (0)