1414
1515import sklearn
1616import sklearn .datasets
17+ from sklearn .base import BaseEstimator
1718from sklearn .base import clone
1819from sklearn .ensemble import VotingClassifier , VotingRegressor
1920
2627 HoldoutValTypes ,
2728)
2829from autoPyTorch .optimizer .smbo import AutoMLSMBO
30+ from autoPyTorch .pipeline .components .setup .traditional_ml .classifier_models import _classifiers
2931from autoPyTorch .pipeline .components .training .metrics .metrics import accuracy
3032
3133
@@ -183,9 +185,12 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
183185 assert len (estimator .ensemble_ .identifiers_ ) == len (estimator .ensemble_ .weights_ )
184186
185187 y_pred = estimator .predict (X_test )
186-
187188 assert np .shape (y_pred )[0 ] == np .shape (X_test )[0 ]
188189
190+ # Make sure that predict proba has the expected shape
191+ probabilites = estimator .predict_proba (X_test )
192+ assert np .shape (probabilites ) == (np .shape (X_test )[0 ], 2 )
193+
189194 score = estimator .score (y_pred , y_test )
190195 assert 'accuracy' in score
191196
@@ -203,6 +208,9 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
203208 restored_estimator = pickle .load (f )
204209 restored_estimator .predict (X_test )
205210
211+ # Test refit on dummy data
212+ estimator .refit (dataset = backend .load_datamanager ())
213+
206214
207215@pytest .mark .parametrize ('openml_name' , ("boston" , ))
208216@unittest .mock .patch ('autoPyTorch.evaluation.train_evaluator.eval_function' ,
@@ -439,6 +447,12 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular):
439447 estimator ._disable_file_output = []
440448 estimator ._all_supported_metrics = False
441449
450+ original_memory_limit = estimator ._memory_limit
451+ estimator ._memory_limit = 500
452+ with pytest .raises (ValueError , match = r".*Dummy prediction failed with run state.*" ):
453+ estimator ._do_dummy_prediction ()
454+
455+ estimator ._memory_limit = original_memory_limit
442456 estimator ._do_dummy_prediction ()
443457
444458 # Ensure that the dummy predictions are not in the current working
@@ -464,3 +478,78 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular):
464478 estimator ._clean_logger ()
465479
466480 del estimator
481+
482+
483+ # TODO: Make faster when https://github.com/automl/Auto-PyTorch/pull/223 is incorporated
484+ @pytest .mark .parametrize ("fit_dictionary_tabular" , ['classification_categorical_only' ], indirect = True )
485+ def test_do_traditional_pipeline (fit_dictionary_tabular ):
486+ backend = fit_dictionary_tabular ['backend' ]
487+ estimator = TabularClassificationTask (
488+ backend = backend ,
489+ resampling_strategy = HoldoutValTypes .holdout_validation ,
490+ ensemble_size = 0 ,
491+ )
492+
493+ # Setup pre-requisites normally set by search()
494+ estimator ._create_dask_client ()
495+ estimator ._metric = accuracy
496+ estimator ._logger = estimator ._get_logger ('test' )
497+ estimator ._memory_limit = 5000
498+ estimator ._time_for_task = 60
499+ estimator ._disable_file_output = []
500+ estimator ._all_supported_metrics = False
501+
502+ estimator ._do_traditional_prediction (time_left = 60 , func_eval_time_limit_secs = 30 )
503+
504+ # The models should not be on the current directory
505+ assert not os .path .exists (os .path .join (os .getcwd (), '.autoPyTorch' ))
506+
507+ # Then we should have fitted 5 classifiers
508+ # Maybe some of them fail (unlikely, but we do not control external API)
509+ # but we want to make this test robust
510+ at_least_one_model_checked = False
511+ for i in range (2 , 7 ):
512+ pred_path = os .path .join (
513+ backend .temporary_directory , '.autoPyTorch' , 'runs' , f"1_{ i } _50.0" ,
514+ f"predictions_ensemble_1_{ i } _50.0.npy"
515+ )
516+ assert os .path .exists (pred_path )
517+
518+ model_path = os .path .join (backend .temporary_directory ,
519+ '.autoPyTorch' ,
520+ 'runs' , f"1_{ i } _50.0" ,
521+ f"1.{ i } .50.0.model" )
522+
523+ # Make sure the dummy model complies with scikit learn
524+ # get/set params
525+ assert os .path .exists (model_path )
526+ with open (model_path , 'rb' ) as model_handler :
527+ model = pickle .load (model_handler )
528+ clone (model )
529+ assert model .config == list (_classifiers .keys ())[i - 2 ]
530+ at_least_one_model_checked = True
531+ if not at_least_one_model_checked :
532+ pytest .fail ("Not even one single traditional pipeline was fitted" )
533+
534+ estimator ._close_dask_client ()
535+ estimator ._clean_logger ()
536+
537+ del estimator
538+
539+
540+ @pytest .mark .parametrize ("api_type" , [TabularClassificationTask , TabularRegressionTask ])
541+ def test_unsupported_msg (api_type ):
542+ api = api_type ()
543+ with pytest .raises (ValueError , match = r".*Dataset is incompatible for the given task.*" ):
544+ api ._get_required_dataset_properties ('dummy' )
545+ with pytest .raises (ValueError , match = r".*is only supported after calling search. Kindly .*" ):
546+ api .predict (np .ones ((10 , 10 )))
547+
548+
549+ @pytest .mark .parametrize ("fit_dictionary_tabular" , ['classification_categorical_only' ], indirect = True )
550+ @pytest .mark .parametrize ("api_type" , [TabularClassificationTask , TabularRegressionTask ])
551+ def test_build_pipeline (api_type , fit_dictionary_tabular ):
552+ api = api_type ()
553+ pipeline = api .build_pipeline (fit_dictionary_tabular ['dataset_properties' ])
554+ assert isinstance (pipeline , BaseEstimator )
555+ assert len (pipeline .steps ) > 0
0 commit comments