Skip to content

Commit e30a075

Browse files
author
Github Actions
committed
ArlindKadra: Bug fix
1 parent 886460a commit e30a075

File tree

131 files changed

+71565
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+71565
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Sphinx build info version 1
2+
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3+
config: d4f3d04cddab6e3e314b10c7fdfafae2
4+
tags: 645f666f9bcd5a90fca523b33c5a78b7
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""
2+
======================
3+
Tabular Classification
4+
======================
5+
6+
The following example shows how to fit a sample classification model
7+
with AutoPyTorch
8+
"""
9+
import os
10+
import tempfile as tmp
11+
import warnings
12+
13+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
14+
os.environ['OMP_NUM_THREADS'] = '1'
15+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
16+
os.environ['MKL_NUM_THREADS'] = '1'
17+
18+
warnings.simplefilter(action='ignore', category=UserWarning)
19+
warnings.simplefilter(action='ignore', category=FutureWarning)
20+
21+
import sklearn.datasets
22+
import sklearn.model_selection
23+
24+
from autoPyTorch.api.tabular_classification import TabularClassificationTask
25+
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
26+
27+
28+
def get_search_space_updates():
29+
"""
30+
Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
31+
Returns:
32+
HyperparameterSearchSpaceUpdates
33+
"""
34+
updates = HyperparameterSearchSpaceUpdates()
35+
updates.append(node_name="data_loader",
36+
hyperparameter="batch_size",
37+
value_range=[16, 512],
38+
default_value=32)
39+
updates.append(node_name="lr_scheduler",
40+
hyperparameter="CosineAnnealingLR:T_max",
41+
value_range=[50, 60],
42+
default_value=55)
43+
updates.append(node_name='network_backbone',
44+
hyperparameter='ResNetBackbone:dropout',
45+
value_range=[0, 0.5],
46+
default_value=0.2)
47+
return updates
48+
49+
50+
if __name__ == '__main__':
51+
############################################################################
52+
# Data Loading
53+
# ============
54+
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
55+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
56+
X,
57+
y,
58+
random_state=1,
59+
)
60+
61+
############################################################################
62+
# Build and fit a classifier
63+
# ==========================
64+
api = TabularClassificationTask(
65+
delete_tmp_folder_after_terminate=False,
66+
search_space_updates=get_search_space_updates()
67+
)
68+
api.search(
69+
X_train=X_train,
70+
y_train=y_train,
71+
X_test=X_test.copy(),
72+
y_test=y_test.copy(),
73+
optimize_metric='accuracy',
74+
total_walltime_limit=500,
75+
func_eval_time_limit=50
76+
)
77+
78+
############################################################################
79+
# Print the final ensemble performance
80+
# ====================================
81+
print(api.run_history, api.trajectory)
82+
y_pred = api.predict(X_test)
83+
score = api.score(y_pred, y_test)
84+
print(score)
85+
print(api.show_models())
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
"""
2+
======================
3+
Tabular Regression
4+
======================
5+
6+
The following example shows how to fit a sample classification model
7+
with AutoPyTorch
8+
"""
9+
import os
10+
import tempfile as tmp
11+
import typing
12+
import warnings
13+
14+
from sklearn.datasets import make_regression
15+
16+
from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator
17+
18+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
19+
os.environ['OMP_NUM_THREADS'] = '1'
20+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
21+
os.environ['MKL_NUM_THREADS'] = '1'
22+
23+
warnings.simplefilter(action='ignore', category=UserWarning)
24+
warnings.simplefilter(action='ignore', category=FutureWarning)
25+
26+
from sklearn import model_selection, preprocessing
27+
28+
from autoPyTorch.api.tabular_regression import TabularRegressionTask
29+
from autoPyTorch.datasets.tabular_dataset import TabularDataset
30+
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
31+
32+
33+
def get_search_space_updates():
34+
"""
35+
Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
36+
Returns:
37+
HyperparameterSearchSpaceUpdates
38+
"""
39+
updates = HyperparameterSearchSpaceUpdates()
40+
updates.append(node_name="data_loader",
41+
hyperparameter="batch_size",
42+
value_range=[16, 512],
43+
default_value=32)
44+
updates.append(node_name="lr_scheduler",
45+
hyperparameter="CosineAnnealingLR:T_max",
46+
value_range=[50, 60],
47+
default_value=55)
48+
updates.append(node_name='network_backbone',
49+
hyperparameter='ResNetBackbone:dropout',
50+
value_range=[0, 0.5],
51+
default_value=0.2)
52+
return updates
53+
54+
55+
if __name__ == '__main__':
56+
############################################################################
57+
# Data Loading
58+
# ============
59+
60+
# Get the training data for tabular regression
61+
# X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True)
62+
63+
# Use dummy data for now since there are problems with categorical columns
64+
X, y = make_regression(
65+
n_samples=5000,
66+
n_features=4,
67+
n_informative=3,
68+
n_targets=1,
69+
shuffle=True,
70+
random_state=0
71+
)
72+
73+
X_train, X_test, y_train, y_test = model_selection.train_test_split(
74+
X,
75+
y,
76+
random_state=1,
77+
)
78+
79+
# Scale the regression targets to have zero mean and unit variance.
80+
# This is important for Neural Networks since predicting large target values would require very large weights.
81+
# One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean
82+
y_train_mean = y_train.mean()
83+
y_train_std = y_train.std()
84+
85+
y_train_scaled = (y_train - y_train_mean) / y_train_std
86+
y_test_scaled = (y_test - y_train_mean) / y_train_std
87+
88+
############################################################################
89+
# Build and fit a regressor
90+
# ==========================
91+
api = TabularRegressionTask(
92+
delete_tmp_folder_after_terminate=False,
93+
search_space_updates=get_search_space_updates()
94+
)
95+
api.search(
96+
X_train=X_train,
97+
y_train=y_train_scaled,
98+
X_test=X_test.copy(),
99+
y_test=y_test_scaled.copy(),
100+
optimize_metric='r2',
101+
total_walltime_limit=500,
102+
func_eval_time_limit=50,
103+
traditional_per_total_budget=0
104+
)
105+
106+
############################################################################
107+
# Print the final ensemble performance
108+
# ====================================
109+
print(api.run_history, api.trajectory)
110+
y_pred_scaled = api.predict(X_test)
111+
112+
# Rescale the Neural Network predictions into the original target range
113+
y_pred = y_pred_scaled * y_train_std + y_train_mean
114+
score = api.score(y_pred, y_test)
115+
116+
print(score)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Tabular Regression\n\nThe following example shows how to fit a sample classification model\nwith AutoPyTorch\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import os\nimport tempfile as tmp\nimport typing\nimport warnings\n\nfrom sklearn.datasets import make_regression\n\nfrom autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom sklearn import model_selection, preprocessing\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\nfrom autoPyTorch.datasets.tabular_dataset import TabularDataset\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n \"\"\"\n Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n Returns:\n HyperparameterSearchSpaceUpdates\n \"\"\"\n updates = HyperparameterSearchSpaceUpdates()\n updates.append(node_name=\"data_loader\",\n hyperparameter=\"batch_size\",\n value_range=[16, 512],\n default_value=32)\n updates.append(node_name=\"lr_scheduler\",\n hyperparameter=\"CosineAnnealingLR:T_max\",\n value_range=[50, 60],\n default_value=55)\n updates.append(node_name='network_backbone',\n hyperparameter='ResNetBackbone:dropout',\n value_range=[0, 0.5],\n default_value=0.2)\n return updates\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n\n # Get the training data for tabular regression\n # X, y = datasets.fetch_openml(name=\"cholesterol\", return_X_y=True)\n\n # Use dummy data for now since there are problems with categorical columns\n X, y = make_regression(\n n_samples=5000,\n n_features=4,\n n_informative=3,\n n_targets=1,\n shuffle=True,\n random_state=0\n )\n\n X_train, X_test, y_train, y_test = model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n # Scale the regression targets to have zero mean and unit variance.\n # This is important for Neural Networks since predicting large target values would require very large weights.\n # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean\n y_train_mean = y_train.mean()\n y_train_std = y_train.std()\n\n y_train_scaled = (y_train - y_train_mean) / y_train_std\n y_test_scaled = (y_test - y_train_mean) / y_train_std\n\n ############################################################################\n # Build and fit a regressor\n # ==========================\n api = TabularRegressionTask(\n delete_tmp_folder_after_terminate=False,\n search_space_updates=get_search_space_updates()\n )\n api.search(\n X_train=X_train,\n y_train=y_train_scaled,\n X_test=X_test.copy(),\n y_test=y_test_scaled.copy(),\n optimize_metric='r2',\n total_walltime_limit=500,\n func_eval_time_limit=50,\n traditional_per_total_budget=0\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred_scaled = api.predict(X_test)\n\n # Rescale the Neural Network predictions into the original target range\n y_pred = y_pred_scaled * y_train_std + y_train_mean\n score = api.score(y_pred, y_test)\n\n print(score)"
30+
]
31+
}
32+
],
33+
"metadata": {
34+
"kernelspec": {
35+
"display_name": "Python 3",
36+
"language": "python",
37+
"name": "python3"
38+
},
39+
"language_info": {
40+
"codemirror_mode": {
41+
"name": "ipython",
42+
"version": 3
43+
},
44+
"file_extension": ".py",
45+
"mimetype": "text/x-python",
46+
"name": "python",
47+
"nbconvert_exporter": "python",
48+
"pygments_lexer": "ipython3",
49+
"version": "3.8.8"
50+
}
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 0
54+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""
2+
======================
3+
Image Classification
4+
======================
5+
"""
6+
import numpy as np
7+
8+
import sklearn.model_selection
9+
10+
import torchvision.datasets
11+
12+
from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline
13+
14+
# Get the training data for tabular classification
15+
trainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True)
16+
data = trainset.data.numpy()
17+
data = np.expand_dims(data, axis=3)
18+
# Create a proof of concept pipeline!
19+
dataset_properties = dict()
20+
pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)
21+
22+
# Train and test split
23+
train_indices, val_indices = sklearn.model_selection.train_test_split(
24+
list(range(data.shape[0])),
25+
random_state=1,
26+
test_size=0.25,
27+
)
28+
29+
# Configuration space
30+
pipeline_cs = pipeline.get_hyperparameter_search_space()
31+
print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}")
32+
config = pipeline_cs.sample_configuration()
33+
print("Pipeline Random Config:\n", '_' * 40, f"\n{config}")
34+
pipeline.set_hyperparameters(config)
35+
36+
# Fit the pipeline
37+
print("Fitting the pipeline...")
38+
39+
pipeline.fit(X=dict(X_train=data,
40+
is_small_preprocess=True,
41+
dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]),
42+
std=np.array([np.std(data[:, :, :, i]) for i in range(1)]),
43+
num_classes=10,
44+
num_features=data.shape[1] * data.shape[2],
45+
image_height=data.shape[1],
46+
image_width=data.shape[2],
47+
is_small_preprocess=True),
48+
train_indices=train_indices,
49+
val_indices=val_indices,
50+
)
51+
)
52+
53+
# Showcase some components of the pipeline
54+
print(pipeline)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Image Classification\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import numpy as np\n\nimport sklearn.model_selection\n\nimport torchvision.datasets\n\nfrom autoPyTorch.pipeline.image_classification import ImageClassificationPipeline\n\n# Get the training data for tabular classification\ntrainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True)\ndata = trainset.data.numpy()\ndata = np.expand_dims(data, axis=3)\n# Create a proof of concept pipeline!\ndataset_properties = dict()\npipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)\n\n# Train and test split\ntrain_indices, val_indices = sklearn.model_selection.train_test_split(\n list(range(data.shape[0])),\n random_state=1,\n test_size=0.25,\n)\n\n# Configuration space\npipeline_cs = pipeline.get_hyperparameter_search_space()\nprint(\"Pipeline CS:\\n\", '_' * 40, f\"\\n{pipeline_cs}\")\nconfig = pipeline_cs.sample_configuration()\nprint(\"Pipeline Random Config:\\n\", '_' * 40, f\"\\n{config}\")\npipeline.set_hyperparameters(config)\n\n# Fit the pipeline\nprint(\"Fitting the pipeline...\")\n\npipeline.fit(X=dict(X_train=data,\n is_small_preprocess=True,\n dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]),\n std=np.array([np.std(data[:, :, :, i]) for i in range(1)]),\n num_classes=10,\n num_features=data.shape[1] * data.shape[2],\n image_height=data.shape[1],\n image_width=data.shape[2],\n is_small_preprocess=True),\n train_indices=train_indices,\n val_indices=val_indices,\n )\n )\n\n# Showcase some components of the pipeline\nprint(pipeline)"
30+
]
31+
}
32+
],
33+
"metadata": {
34+
"kernelspec": {
35+
"display_name": "Python 3",
36+
"language": "python",
37+
"name": "python3"
38+
},
39+
"language_info": {
40+
"codemirror_mode": {
41+
"name": "ipython",
42+
"version": 3
43+
},
44+
"file_extension": ".py",
45+
"mimetype": "text/x-python",
46+
"name": "python",
47+
"nbconvert_exporter": "python",
48+
"pygments_lexer": "ipython3",
49+
"version": "3.8.8"
50+
}
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 0
54+
}

0 commit comments

Comments
 (0)