Update for release (#335)

ravinkohli · web-flow · commit a676c2b91bdc · 2021-11-22T13:53:32.000+01:00
* Create release workflow and CITATION.cff  and update README, setup.py

* fix bug in pypy token

* fix documentation formatting

* TODO for docker image

* accept suggestions from shuhei

* add further options for disable_file_output documentation

* remove  from release.yml
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,33 @@
+name: Push to PyPi
+
+on:
+  push:
+    branches:
+      - master
+
+jobs:
+  test:
+    runs-on: "ubuntu-latest"
+
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.8
+
+      - name: Install build dependencies
+        run: python -m pip install build wheel
+
+      - name: Build distributions
+        shell: bash -l {0}
+        run: python setup.py sdist bdist_wheel
+
+      - name: Publish package to PyPI
+        if: github.repository == 'automl/Auto-PyTorch' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
+        uses: pypa/gh-action-pypi-publish@master
+        with:
+          user: __token__
+          password: ${{ secrets.pypi_token }}
diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,19 @@
+preferred-citation:
+  type: article
+  authors:
+  - family-names: "Zimmer"
+    given-names: "Lucas"
+    affiliation: "University of Freiburg, Germany"    
+  - family-names: "Lindauer"
+    given-names: "Marius"
+    affiliation: "University of Freiburg, Germany"    
+  - family-names: "Hutter"
+    given-names: "Frank"
+    affiliation: "University of Freiburg, Germany"
+  doi: "10.1109/TPAMI.2021.3067763"
+  journal-title: "IEEE Transactions on Pattern Analysis and Machine Intelligence"
+  title: "Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL"
+  year: 2021
+  note: "also available under https://arxiv.org/abs/2006.13799"
+  start: 3079
+  end: 3090
diff --git a/README.md b/README.md
@@ -1,14 +1,14 @@
 # Auto-PyTorch
 
-Copyright (C) 2019  [AutoML Group Freiburg](http://www.automl.org/)
+Copyright (C) 2021  [AutoML Groups Freiburg and Hannover](http://www.automl.org/)
 
-This an alpha version of Auto-PyTorch with improved API.
-So far, Auto-PyTorch supports tabular data (classification, regression).
-We plan to enable image data and time-series data.
+While early AutoML frameworks focused on optimizing traditional ML pipelines and their hyperparameters, another trend in AutoML is to focus on neural architecture search. To bring the best of these two worlds together, we developed **Auto-PyTorch**, which jointly and robustly optimizes the network architecture and the training hyperparameters to enable fully automated deep learning (AutoDL).
 
+Auto-PyTorch is mainly developed to support tabular data (classification, regression).
+The newest features in Auto-PyTorch for tabular data are described in the paper ["Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL"](https://arxiv.org/abs/2006.13799) (see below for bibtex ref).
 
-Find the documentation [here](https://automl.github.io/Auto-PyTorch/development)
-
+***From v0.1.0, AutoPyTorch has been updated to further improve usability, robustness and efficiency by using SMAC as the underlying optimization package as well as changing the code structure. Therefore, moving from v0.0.2 to v0.1.0 will break compatibility. 
+In case you would like to use the old API, you can find it at [`master_old`](https://github.com/automl/Auto-PyTorch/tree/master-old).***
 
 ## Installation
 
@@ -33,6 +33,50 @@ python setup.py install
 
 ```
 
+## Examples
+
+In a nutshell:
+
+```py
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+
+# data and metric imports
+import sklearn.model_selection
+import sklearn.datasets
+import sklearn.metrics
+X, y = sklearn.datasets.load_digits(return_X_y=True)
+X_train, X_test, y_train, y_test = \
+        sklearn.model_selection.train_test_split(X, y, random_state=1)
+
+# initialise Auto-PyTorch api
+api = TabularClassificationTask()
+
+# Search for an ensemble of machine learning algorithms
+api.search(
+    X_train=X_train,
+    y_train=y_train,
+    X_test=X_test,
+    y_test=y_test,
+    optimize_metric='accuracy',
+    total_walltime_limit=300,
+    func_eval_time_limit_secs=50
+)
+
+# Calculate test accuracy
+y_pred = api.predict(X_test)
+score = api.score(y_pred, y_test)
+print("Accuracy score", score)
+```
+
+For more examples including customising the search space, parellising the code, etc, checkout the `examples` folder
+
+```sh
+$ cd examples/
+```
+
+
+Code for the [paper](https://arxiv.org/abs/2006.13799) is available under `examples/ensemble` in the [TPAMI.2021.3067763](https://github.com/automl/Auto-PyTorch/tree/TPAMI.2021.3067763`) branch.
+
 ## Contributing
 
 If you want to contribute to Auto-PyTorch, clone the repository and checkout our current development branch
@@ -63,8 +107,8 @@ Please refer to the branch `TPAMI.2021.3067763` to reproduce the paper *Auto-PyT
   title = {Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL},
   journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
   year = {2021},
-  note = {IEEE early access; also available under https://arxiv.org/abs/2006.13799},
-  pages = {1-12}
+  note = {also available under https://arxiv.org/abs/2006.13799},
+  pages = {3079 - 3090}
 }
 ```
 
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -762,6 +762,7 @@ def _search(
             budget_type (str):
                 Type of budget to be used when fitting the pipeline.
                 It can be one of:
+
                 + `epochs`: The training of each pipeline will be terminated after
                     a number of epochs have passed. This number of epochs is determined by the
                     budget argument of this method.
@@ -840,6 +841,21 @@ def _search(
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
             disable_file_output (Union[bool, List]):
+                If True, disable model and prediction output.
+                Can also be used as a list to pass more fine-grained
+                information on what to save. Allowed elements in the list are:
+
+                + `y_optimization`:
+                    do not save the predictions for the optimization set,
+                    which would later on be used to build an ensemble. Note that SMAC
+                    optimizes a metric evaluated on the optimization set.
+                + `pipeline`:
+                    do not save any individual pipeline files
+                + `pipelines`:
+                    In case of cross validation, disables saving the joint model of the
+                    pipelines fit on each fold.
+                + `y_test`:
+                    do not save the predictions for the test set.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -159,6 +159,7 @@ def search(
             budget_type (str):
                 Type of budget to be used when fitting the pipeline.
                 It can be one of:
+
                 + `epochs`: The training of each pipeline will be terminated after
                     a number of epochs have passed. This number of epochs is determined by the
                     budget argument of this method.
@@ -237,6 +238,21 @@ def search(
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
             disable_file_output (Union[bool, List]):
+                If True, disable model and prediction output.
+                Can also be used as a list to pass more fine-grained
+                information on what to save. Allowed elements in the list are:
+
+                + `y_optimization`:
+                    do not save the predictions for the optimization set,
+                    which would later on be used to build an ensemble. Note that SMAC
+                    optimizes a metric evaluated on the optimization set.
+                + `pipeline`:
+                    do not save any individual pipeline files
+                + `pipelines`:
+                    In case of cross validation, disables saving the joint model of the
+                    pipelines fit on each fold.
+                + `y_test`:
+                    do not save the predictions for the test set.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -160,6 +160,7 @@ def search(
             budget_type (str):
                 Type of budget to be used when fitting the pipeline.
                 It can be one of:
+
                 + `epochs`: The training of each pipeline will be terminated after
                     a number of epochs have passed. This number of epochs is determined by the
                     budget argument of this method.
@@ -173,15 +174,15 @@ def search(
                     is used, min_budget will refer to epochs whereas if budget_type=='runtime' then
                     min_budget will refer to seconds.
             min_budget (int):
-                Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
+                Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>`_ to
                 trade-off resources between running many pipelines at min_budget and
                 running the top performing pipelines on max_budget.
                 min_budget states the minimum resource allocation a pipeline should have
                 so that we can compare and quickly discard bad performing models.
                 For example, if the budget_type is epochs, and min_budget=5, then we will
                 run every pipeline to a minimum of 5 epochs before performance comparison.
             max_budget (int):
-                Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
+                Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>`_ to
                 trade-off resources between running many pipelines at min_budget and
                 running the top performing pipelines on max_budget.
                 max_budget states the maximum resource allocation a pipeline is going to
@@ -238,6 +239,21 @@ def search(
                 Numeric precision used when loading ensemble data.
                 Can be either '16', '32' or '64'.
             disable_file_output (Union[bool, List]):
+                If True, disable model and prediction output.
+                Can also be used as a list to pass more fine-grained
+                information on what to save. Allowed elements in the list are:
+
+                + `y_optimization`:
+                    do not save the predictions for the optimization set,
+                    which would later on be used to build an ensemble. Note that SMAC
+                    optimizes a metric evaluated on the optimization set.
+                + `pipeline`:
+                    do not save any individual pipeline files
+                + `pipelines`:
+                    In case of cross validation, disables saving the joint model of the
+                    pipelines fit on each fold.
+                + `y_test`:
+                    do not save the predictions for the test set.
             load_models (bool: default=True):
                 Whether to load the models after fitting AutoPyTorch.
             portfolio_selection (Optional[str]):
diff --git a/docs/extending.rst b/docs/extending.rst
@@ -5,3 +5,5 @@
 ======================
 Extending Auto-PyTorch
 ======================
+
+TODO
diff --git a/setup.py b/setup.py
@@ -23,7 +23,7 @@
     name="autoPyTorch",
     version="0.1.0",
     author="AutoML Freiburg",
-    author_email="zimmerl@informatik.uni-freiburg.de",
+    author_email="eddiebergmanhs@gmail.com",
     description=("Auto-PyTorch searches neural architectures using smac"),
     long_description=long_description,
     url="https://github.com/automl/Auto-PyTorch",