automl
diff --git a/‎refactor_development_cocktail_update/.buildinfo‎
Lines changed: 4 additions & 0 deletions b/‎refactor_development_cocktail_update/.buildinfo‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎refactor_development_cocktail_update/_downloads/08a8d852a0652c3f1a4a72cc129ab783/example_tabular_classification.py‎
Lines changed: 85 additions & 0 deletions b/‎refactor_development_cocktail_update/_downloads/08a8d852a0652c3f1a4a72cc129ab783/example_tabular_classification.py‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎refactor_development_cocktail_update/_downloads/0baaec1666f007b22da0886cb1b9e240/example_tabular_regression.py‎
Lines changed: 116 additions & 0 deletions b/‎refactor_development_cocktail_update/_downloads/0baaec1666f007b22da0886cb1b9e240/example_tabular_regression.py‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎refactor_development_cocktail_update/_downloads/306036486863b5329c4111d8adbaac63/example_tabular_regression.ipynb‎
Lines changed: 54 additions & 0 deletions b/‎refactor_development_cocktail_update/_downloads/306036486863b5329c4111d8adbaac63/example_tabular_regression.ipynb‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎refactor_development_cocktail_update/_downloads/3a985c2d5cf88bfc51ae65d16b30f86c/example_image_classification.py‎
Lines changed: 54 additions & 0 deletions b/‎refactor_development_cocktail_update/_downloads/3a985c2d5cf88bfc51ae65d16b30f86c/example_image_classification.py‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎refactor_development_cocktail_update/_downloads/a39c0378d911b81ecec47ff0a116e6bf/example_image_classification.ipynb‎
Lines changed: 54 additions & 0 deletions b/‎refactor_development_cocktail_update/_downloads/a39c0378d911b81ecec47ff0a116e6bf/example_image_classification.ipynb‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎refactor_development_cocktail_update/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip‎
8.8 KB b/‎refactor_development_cocktail_update/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip‎
8.8 KB
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: d4f3d04cddab6e3e314b10c7fdfafae2
+tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,85 @@
+"""
+======================
+Tabular Classification
+======================
+
+The following example shows how to fit a sample classification model
+with AutoPyTorch
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
+
+
+def get_search_space_updates():
+    """
+    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
+    Returns:
+        HyperparameterSearchSpaceUpdates
+    """
+    updates = HyperparameterSearchSpaceUpdates()
+    updates.append(node_name="data_loader",
+                   hyperparameter="batch_size",
+                   value_range=[16, 512],
+                   default_value=32)
+    updates.append(node_name="lr_scheduler",
+                   hyperparameter="CosineAnnealingLR:T_max",
+                   value_range=[50, 60],
+                   default_value=55)
+    updates.append(node_name='network_backbone',
+                   hyperparameter='ResNetBackbone:dropout',
+                   value_range=[0, 0.5],
+                   default_value=0.2)
+    return updates
+
+
+if __name__ == '__main__':
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+    api = TabularClassificationTask(
+        delete_tmp_folder_after_terminate=False,
+        search_space_updates=get_search_space_updates()
+    )
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=500,
+        func_eval_time_limit=50
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    print(api.show_models())
@@ -0,0 +1,116 @@
+"""
+======================
+Tabular Regression
+======================
+
+The following example shows how to fit a sample classification model
+with AutoPyTorch
+"""
+import os
+import tempfile as tmp
+import typing
+import warnings
+
+from sklearn.datasets import make_regression
+
+from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+from sklearn import model_selection, preprocessing
+
+from autoPyTorch.api.tabular_regression import TabularRegressionTask
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
+
+
+def get_search_space_updates():
+    """
+    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
+    Returns:
+        HyperparameterSearchSpaceUpdates
+    """
+    updates = HyperparameterSearchSpaceUpdates()
+    updates.append(node_name="data_loader",
+                   hyperparameter="batch_size",
+                   value_range=[16, 512],
+                   default_value=32)
+    updates.append(node_name="lr_scheduler",
+                   hyperparameter="CosineAnnealingLR:T_max",
+                   value_range=[50, 60],
+                   default_value=55)
+    updates.append(node_name='network_backbone',
+                   hyperparameter='ResNetBackbone:dropout',
+                   value_range=[0, 0.5],
+                   default_value=0.2)
+    return updates
+
+
+if __name__ == '__main__':
+    ############################################################################
+    # Data Loading
+    # ============
+
+    # Get the training data for tabular regression
+    # X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True)
+
+    # Use dummy data for now since there are problems with categorical columns
+    X, y = make_regression(
+        n_samples=5000,
+        n_features=4,
+        n_informative=3,
+        n_targets=1,
+        shuffle=True,
+        random_state=0
+    )
+
+    X_train, X_test, y_train, y_test = model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    # Scale the regression targets to have zero mean and unit variance.
+    # This is important for Neural Networks since predicting large target values would require very large weights.
+    # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean
+    y_train_mean = y_train.mean()
+    y_train_std = y_train.std()
+
+    y_train_scaled = (y_train - y_train_mean) / y_train_std
+    y_test_scaled = (y_test - y_train_mean) / y_train_std
+
+    ############################################################################
+    # Build and fit a regressor
+    # ==========================
+    api = TabularRegressionTask(
+        delete_tmp_folder_after_terminate=False,
+        search_space_updates=get_search_space_updates()
+    )
+    api.search(
+        X_train=X_train,
+        y_train=y_train_scaled,
+        X_test=X_test.copy(),
+        y_test=y_test_scaled.copy(),
+        optimize_metric='r2',
+        total_walltime_limit=500,
+        func_eval_time_limit=50,
+        traditional_per_total_budget=0
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred_scaled = api.predict(X_test)
+
+    # Rescale the Neural Network predictions into the original target range
+    y_pred = y_pred_scaled * y_train_std + y_train_mean
+    score = api.score(y_pred, y_test)
+
+    print(score)
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Tabular Regression\n\nThe following example shows how to fit a sample classification model\nwith AutoPyTorch\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tempfile as tmp\nimport typing\nimport warnings\n\nfrom sklearn.datasets import make_regression\n\nfrom autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom sklearn import model_selection, preprocessing\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\nfrom autoPyTorch.datasets.tabular_dataset import TabularDataset\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n    \"\"\"\n    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n    Returns:\n        HyperparameterSearchSpaceUpdates\n    \"\"\"\n    updates = HyperparameterSearchSpaceUpdates()\n    updates.append(node_name=\"data_loader\",\n                   hyperparameter=\"batch_size\",\n                   value_range=[16, 512],\n                   default_value=32)\n    updates.append(node_name=\"lr_scheduler\",\n                   hyperparameter=\"CosineAnnealingLR:T_max\",\n                   value_range=[50, 60],\n                   default_value=55)\n    updates.append(node_name='network_backbone',\n                   hyperparameter='ResNetBackbone:dropout',\n                   value_range=[0, 0.5],\n                   default_value=0.2)\n    return updates\n\n\nif __name__ == '__main__':\n    ############################################################################\n    # Data Loading\n    # ============\n\n    # Get the training data for tabular regression\n    # X, y = datasets.fetch_openml(name=\"cholesterol\", return_X_y=True)\n\n    # Use dummy data for now since there are problems with categorical columns\n    X, y = make_regression(\n        n_samples=5000,\n        n_features=4,\n        n_informative=3,\n        n_targets=1,\n        shuffle=True,\n        random_state=0\n    )\n\n    X_train, X_test, y_train, y_test = model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    # Scale the regression targets to have zero mean and unit variance.\n    # This is important for Neural Networks since predicting large target values would require very large weights.\n    # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean\n    y_train_mean = y_train.mean()\n    y_train_std = y_train.std()\n\n    y_train_scaled = (y_train - y_train_mean) / y_train_std\n    y_test_scaled = (y_test - y_train_mean) / y_train_std\n\n    ############################################################################\n    # Build and fit a regressor\n    # ==========================\n    api = TabularRegressionTask(\n        delete_tmp_folder_after_terminate=False,\n        search_space_updates=get_search_space_updates()\n    )\n    api.search(\n        X_train=X_train,\n        y_train=y_train_scaled,\n        X_test=X_test.copy(),\n        y_test=y_test_scaled.copy(),\n        optimize_metric='r2',\n        total_walltime_limit=500,\n        func_eval_time_limit=50,\n        traditional_per_total_budget=0\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred_scaled = api.predict(X_test)\n\n    # Rescale the Neural Network predictions into the original target range\n    y_pred = y_pred_scaled * y_train_std + y_train_mean\n    score = api.score(y_pred, y_test)\n\n    print(score)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,54 @@
+"""
+======================
+Image Classification
+======================
+"""
+import numpy as np
+
+import sklearn.model_selection
+
+import torchvision.datasets
+
+from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline
+
+# Get the training data for tabular classification
+trainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True)
+data = trainset.data.numpy()
+data = np.expand_dims(data, axis=3)
+# Create a proof of concept pipeline!
+dataset_properties = dict()
+pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)
+
+# Train and test split
+train_indices, val_indices = sklearn.model_selection.train_test_split(
+    list(range(data.shape[0])),
+    random_state=1,
+    test_size=0.25,
+)
+
+# Configuration space
+pipeline_cs = pipeline.get_hyperparameter_search_space()
+print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}")
+config = pipeline_cs.sample_configuration()
+print("Pipeline Random Config:\n", '_' * 40, f"\n{config}")
+pipeline.set_hyperparameters(config)
+
+# Fit the pipeline
+print("Fitting the pipeline...")
+
+pipeline.fit(X=dict(X_train=data,
+                    is_small_preprocess=True,
+                    dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]),
+                                            std=np.array([np.std(data[:, :, :, i]) for i in range(1)]),
+                                            num_classes=10,
+                                            num_features=data.shape[1] * data.shape[2],
+                                            image_height=data.shape[1],
+                                            image_width=data.shape[2],
+                                            is_small_preprocess=True),
+                    train_indices=train_indices,
+                    val_indices=val_indices,
+                    )
+             )
+
+# Showcase some components of the pipeline
+print(pipeline)
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Image Classification\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n\nimport sklearn.model_selection\n\nimport torchvision.datasets\n\nfrom autoPyTorch.pipeline.image_classification import ImageClassificationPipeline\n\n# Get the training data for tabular classification\ntrainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True)\ndata = trainset.data.numpy()\ndata = np.expand_dims(data, axis=3)\n# Create a proof of concept pipeline!\ndataset_properties = dict()\npipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)\n\n# Train and test split\ntrain_indices, val_indices = sklearn.model_selection.train_test_split(\n    list(range(data.shape[0])),\n    random_state=1,\n    test_size=0.25,\n)\n\n# Configuration space\npipeline_cs = pipeline.get_hyperparameter_search_space()\nprint(\"Pipeline CS:\\n\", '_' * 40, f\"\\n{pipeline_cs}\")\nconfig = pipeline_cs.sample_configuration()\nprint(\"Pipeline Random Config:\\n\", '_' * 40, f\"\\n{config}\")\npipeline.set_hyperparameters(config)\n\n# Fit the pipeline\nprint(\"Fitting the pipeline...\")\n\npipeline.fit(X=dict(X_train=data,\n                    is_small_preprocess=True,\n                    dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]),\n                                            std=np.array([np.std(data[:, :, :, i]) for i in range(1)]),\n                                            num_classes=10,\n                                            num_features=data.shape[1] * data.shape[2],\n                                            image_height=data.shape[1],\n                                            image_width=data.shape[2],\n                                            is_small_preprocess=True),\n                    train_indices=train_indices,\n                    val_indices=val_indices,\n                    )\n             )\n\n# Showcase some components of the pipeline\nprint(pipeline)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}