Skip to content

Commit e304c4b

Browse files
authored
Merge branch 'development' into update_warnings
2 parents 627d69f + 88ad023 commit e304c4b

File tree

10 files changed

+586
-244
lines changed

10 files changed

+586
-244
lines changed

.github/workflows/isort_checker.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
name: black-format-check
1+
name: isort-check
22

33
on: [push, pull_request, workflow_dispatch]
44

55
env:
6-
#If STRICT is set to true, it will fail on black check fail
6+
#If STRICT is set to true, it will fail on isort check fail
77
STRICT: false
88

99
jobs:
@@ -22,7 +22,7 @@ jobs:
2222
with:
2323
python-version: "3.7"
2424

25-
- name: Install black
25+
- name: Install isort
2626
run: |
2727
pip install isort
2828

autosklearn/util/pipeline.py

Lines changed: 91 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# -*- encoding: utf-8 -*-
2-
from typing import Any, Dict, List, Optional
2+
from typing import Any, Dict, List, Optional, Union
33

44
from ConfigSpace.configuration_space import ConfigurationSpace
55

6-
from sklearn.pipeline import Pipeline
6+
import numpy as np
77

88
from autosklearn.constants import (
99
BINARY_CLASSIFICATION,
@@ -16,27 +16,69 @@
1616
from autosklearn.pipeline.regression import SimpleRegressionPipeline
1717

1818

19-
__all__ = [
20-
'get_configuration_space',
21-
'get_class',
22-
]
19+
__all__ = ['get_configuration_space']
2320

2421

25-
def get_configuration_space(info: Dict[str, Any],
26-
include: Optional[Dict[str, List[str]]] = None,
27-
exclude: Optional[Dict[str, List[str]]] = None,
28-
) -> ConfigurationSpace:
22+
def get_configuration_space(
23+
info: Dict[str, Any],
24+
include: Optional[Dict[str, List[str]]] = None,
25+
exclude: Optional[Dict[str, List[str]]] = None,
26+
random_state: Optional[Union[int, np.random.RandomState]] = None
27+
) -> ConfigurationSpace:
28+
"""Get the configuration of a pipeline given some dataset info
2929
30+
Parameters
31+
----------
32+
info: Dict[str, Any]
33+
Information about the dataset
34+
35+
include: Optional[Dict[str, List[str]]] = None
36+
A dictionary of what components to include for each pipeline step
37+
38+
exclude: Optional[Dict[str, List[str]]] = None
39+
A dictionary of what components to exclude for each pipeline step
40+
41+
random_state: Optional[Union[int, np.random.Randomstate]] = None
42+
The random state to use for seeding the ConfigSpace
43+
44+
Returns
45+
-------
46+
ConfigurationSpace
47+
The configuration space for the pipeline
48+
"""
3049
if info['task'] in REGRESSION_TASKS:
31-
return _get_regression_configuration_space(info, include, exclude)
50+
return _get_regression_configuration_space(info, include, exclude, random_state)
3251
else:
33-
return _get_classification_configuration_space(info, include, exclude)
52+
return _get_classification_configuration_space(info, include, exclude, random_state)
53+
3454

55+
def _get_regression_configuration_space(
56+
info: Dict[str, Any],
57+
include: Optional[Dict[str, List[str]]],
58+
exclude: Optional[Dict[str, List[str]]],
59+
random_state: Optional[Union[int, np.random.RandomState]] = None
60+
) -> ConfigurationSpace:
61+
"""Get the configuration of a regression pipeline given some dataset info
3562
36-
def _get_regression_configuration_space(info: Dict[str, Any],
37-
include: Optional[Dict[str, List[str]]],
38-
exclude: Optional[Dict[str, List[str]]]
39-
) -> ConfigurationSpace:
63+
Parameters
64+
----------
65+
info: Dict[str, Any]
66+
Information about the dataset
67+
68+
include: Optional[Dict[str, List[str]]] = None
69+
A dictionary of what components to include for each pipeline step
70+
71+
exclude: Optional[Dict[str, List[str]]] = None
72+
A dictionary of what components to exclude for each pipeline step
73+
74+
random_state: Optional[Union[int, np.random.Randomstate]] = None
75+
The random state to use for seeding the ConfigSpace
76+
77+
Returns
78+
-------
79+
ConfigurationSpace
80+
The configuration space for the regression pipeline
81+
"""
4082
task_type = info['task']
4183
sparse = False
4284
multioutput = False
@@ -54,15 +96,39 @@ def _get_regression_configuration_space(info: Dict[str, Any],
5496
configuration_space = SimpleRegressionPipeline(
5597
dataset_properties=dataset_properties,
5698
include=include,
57-
exclude=exclude
99+
exclude=exclude,
100+
random_state=random_state
58101
).get_hyperparameter_search_space()
59102
return configuration_space
60103

61104

62-
def _get_classification_configuration_space(info: Dict[str, Any],
63-
include: Optional[Dict[str, List[str]]],
64-
exclude: Optional[Dict[str, List[str]]]
65-
) -> ConfigurationSpace:
105+
def _get_classification_configuration_space(
106+
info: Dict[str, Any],
107+
include: Optional[Dict[str, List[str]]],
108+
exclude: Optional[Dict[str, List[str]]],
109+
random_state: Optional[Union[int, np.random.RandomState]] = None
110+
) -> ConfigurationSpace:
111+
"""Get the configuration of a classification pipeline given some dataset info
112+
113+
Parameters
114+
----------
115+
info: Dict[str, Any]
116+
Information about the dataset
117+
118+
include: Optional[Dict[str, List[str]]] = None
119+
A dictionary of what components to include for each pipeline step
120+
121+
exclude: Optional[Dict[str, List[str]]] = None
122+
A dictionary of what components to exclude for each pipeline step
123+
124+
random_state: Optional[Union[int, np.random.Randomstate]] = None
125+
The random state to use for seeding the ConfigSpace
126+
127+
Returns
128+
-------
129+
ConfigurationSpace
130+
The configuration space for the classification pipeline
131+
"""
66132
task_type = info['task']
67133

68134
multilabel = False
@@ -87,12 +153,7 @@ def _get_classification_configuration_space(info: Dict[str, Any],
87153

88154
return SimpleClassificationPipeline(
89155
dataset_properties=dataset_properties,
90-
include=include, exclude=exclude).\
91-
get_hyperparameter_search_space()
92-
93-
94-
def get_class(info: Dict[str, Any]) -> Pipeline:
95-
if info['task'] in REGRESSION_TASKS:
96-
return SimpleRegressionPipeline
97-
else:
98-
return SimpleClassificationPipeline
156+
include=include,
157+
exclude=exclude,
158+
random_state=random_state
159+
).get_hyperparameter_search_space()

doc/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ html:
6060
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
6161

6262
html-noexamples:
63-
$(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html
63+
SPHINX_GALLERY_PLOT=False $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html
6464
@echo
6565
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
6666

@@ -167,7 +167,7 @@ changes:
167167
@echo "The overview file is in $(BUILDDIR)/changes."
168168

169169
linkcheck:
170-
$(SPHINXBUILD) -D plot_gallery=0 -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170+
SPHINX_GALLERY_PLOT=False $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
171171
@echo
172172
@echo "Link check complete; look for any errors in the above output " \
173173
"or in $(BUILDDIR)/linkcheck/output.txt."

doc/conf.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,22 @@
6868
if "dev" in autosklearn.__version__:
6969
binder_branch = "development"
7070

71+
# Getting issues with the `-D plot_gallery=0` for sphinx gallery, this is a workaround
72+
# We do this by setting an evironment variable we check and modifying the python config
73+
# object.
74+
# We have this extra processing as it enters as a raw string and we need a boolean value
75+
gallery_env_var ="SPHINX_GALLERY_PLOT"
76+
77+
sphinx_plot_gallery_flag = True
78+
if gallery_env_var in os.environ:
79+
value = os.environ[gallery_env_var]
80+
if value in ["False", "false", "0"]:
81+
sphinx_plot_gallery_flag = False
82+
elif value in ["True", "true", "1"]:
83+
sphinx_plot_gallery_flag = True
84+
else:
85+
raise ValueError(f'Env variable {gallery_env_var} must be set to "false" or "true"')
86+
7187
sphinx_gallery_conf = {
7288
# path to the examples
7389
'examples_dirs': '../examples',
@@ -78,6 +94,7 @@
7894
#'reference_url': {
7995
# 'autosklearn': None
8096
#},
97+
'plot_gallery': sphinx_plot_gallery_flag,
8198
'backreferences_dir': None,
8299
'filename_pattern': 'example.*.py$',
83100
'ignore_pattern': r'custom_metrics\.py|__init__\.py',

examples/40_advanced/example_resampling.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@
9898
# data by the first feature. In practice, one would use a splitting according
9999
# to the use case at hand.
100100

101+
selected_indices = (X_train[:, 0] < np.mean(X_train[:, 0])).astype(int)
101102
resampling_strategy = sklearn.model_selection.PredefinedSplit(
102-
test_fold=np.where(X_train[:, 0] < np.mean(X_train[:, 0]))[0]
103+
test_fold=selected_indices
103104
)
104105

105106
automl = autosklearn.classification.AutoSklearnClassifier(
@@ -111,6 +112,8 @@
111112
)
112113
automl.fit(X_train, y_train, dataset_name='breast_cancer')
113114

115+
print(automl.sprint_statistics())
116+
114117
############################################################################
115118
# For custom resampling strategies (i.e. resampling strategies that are not
116119
# defined as strings by Auto-sklearn) it is necessary to perform a refit:

setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,12 @@
4242
"seaborn",
4343
],
4444
"docs": [
45-
"sphinx",
46-
"sphinx-gallery<=0.10.0",
45+
"sphinx<4.3",
46+
"sphinx-gallery",
4747
"sphinx_bootstrap_theme",
4848
"numpydoc",
4949
"sphinx_toolbox",
50+
"docutils==0.16"
5051
],
5152
}
5253

test/test_metalearning/pyMetaLearn/test_meta_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ def setUp(self):
1717
data_dir = os.path.join(data_dir, 'test_meta_base_data')
1818
os.chdir(data_dir)
1919

20-
cs = autosklearn.pipeline.classification.SimpleClassificationPipeline()\
21-
.get_hyperparameter_search_space()
20+
pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
21+
cs = pipeline.get_hyperparameter_search_space()
2222

2323
self.logger = logging.getLogger()
2424
self.base = MetaBase(cs, data_dir, logger=self.logger)

test/test_metalearning/pyMetaLearn/test_metalearner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def setUp(self):
2323
data_dir = os.path.join(data_dir, 'test_meta_base_data')
2424
os.chdir(data_dir)
2525

26-
self.cs = autosklearn.pipeline.classification\
27-
.SimpleClassificationPipeline().get_hyperparameter_search_space()
26+
pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
27+
self.cs = pipeline.get_hyperparameter_search_space()
2828

2929
self.logger = logging.getLogger()
3030
meta_base = MetaBase(self.cs, data_dir, logger=self.logger)

test/test_pipeline/components/data_preprocessing/test_balancing.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,7 @@ def test_weighting_effect(self):
108108
default = cs.get_default_configuration()
109109
default._values['balancing:strategy'] = strategy
110110

111-
classifier = SimpleClassificationPipeline(
112-
config=default, **model_args
113-
)
111+
classifier = SimpleClassificationPipeline(config=default, **model_args)
114112
classifier.fit(X_train, Y_train)
115113

116114
predictions1 = classifier.predict(X_test)
@@ -126,9 +124,7 @@ def test_weighting_effect(self):
126124
X_test = data_[0][100:]
127125
Y_test = data_[1][100:]
128126

129-
classifier = SimpleClassificationPipeline(
130-
config=default, **model_args
131-
)
127+
classifier = SimpleClassificationPipeline(config=default, **model_args)
132128
Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
133129
classifier.fit_estimator(Xt, Y_train, **fit_params)
134130

@@ -157,8 +153,7 @@ def test_weighting_effect(self):
157153

158154
include = {'classifier': ['sgd'], 'feature_preprocessor': [name]}
159155

160-
classifier = SimpleClassificationPipeline(
161-
random_state=1, include=include)
156+
classifier = SimpleClassificationPipeline(random_state=1, include=include)
162157
cs = classifier.get_hyperparameter_search_space()
163158
default = cs.get_default_configuration()
164159
default._values['balancing:strategy'] = strategy
@@ -177,8 +172,7 @@ def test_weighting_effect(self):
177172
Y_test = data_[1][100:]
178173

179174
default._values['balancing:strategy'] = strategy
180-
classifier = SimpleClassificationPipeline(
181-
default, random_state=1, include=include)
175+
classifier = SimpleClassificationPipeline(default, random_state=1, include=include)
182176
Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
183177
classifier.fit_estimator(Xt, Y_train, **fit_params)
184178
predictions = classifier.predict(X_test)

0 commit comments

Comments
 (0)