From d60c064ca9fa787b9af67bf9609cae2811246eea Mon Sep 17 00:00:00 2001 From: Ramesht Shukla <36106177+ramesht007@users.noreply.github.com> Date: Wed, 7 Oct 2020 11:10:59 +0530 Subject: [PATCH 1/7] Initial commit --- .github/workflows/hvd-tests.yml | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 .github/workflows/hvd-tests.yml diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml new file mode 100644 index 000000000000..8106ffc91411 --- /dev/null +++ b/.github/workflows/hvd-tests.yml @@ -0,0 +1,48 @@ +name: Run Horovod tests +on: [push, pull_request] + +jobs: + horovod-tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7] + + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + + ## Install Horovod + sudo apt-get update + python -m pip install --upgrade pip + pip install horovod + + ## Install gsutil + echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list + sudo apt-get install -y apt-transport-https ca-certificates gnupg curl + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - + sudo apt-get update && sudo apt-get install -y google-cloud-sdk + + ## Install openblas and mkl + sudo apt-get install -y libopenblas-dev libomp5 + pip install mkl + + ## Download torch & xla + gsutil cp gs://tpu-pytorch/wheels/torch-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl . + gsutil cp gs://tpu-pytorch/wheels/torch_xla-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl . + + ## Install torch & xla + pip install torch-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl + pip install torch_xla-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl + + ## Install test deps and Ignite + pip install -r requirements-dev.txt + python setup.py install + + From a184b055970110abc8f869155bd04f72ff136c5f Mon Sep 17 00:00:00 2001 From: Ramesht Shukla <36106177+ramesht007@users.noreply.github.com> Date: Wed, 7 Oct 2020 11:24:40 +0530 Subject: [PATCH 2/7] Update hvd-tests.yml --- .github/workflows/hvd-tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml index 8106ffc91411..310b376fdbb6 100644 --- a/.github/workflows/hvd-tests.yml +++ b/.github/workflows/hvd-tests.yml @@ -6,7 +6,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7] + python-version: [3.6] + xla-version: [nightly, ] steps: From 8789a8c9a140ebc9ee04e68061de8440c8a788c1 Mon Sep 17 00:00:00 2001 From: ramesht007 Date: Thu, 8 Oct 2020 16:07:47 +0530 Subject: [PATCH 3/7] Update hvd-tests.yml --- .github/workflows/hvd-tests.yml | 105 +++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 28 deletions(-) diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml index 310b376fdbb6..07d9f84a8fa1 100644 --- a/.github/workflows/hvd-tests.yml +++ b/.github/workflows/hvd-tests.yml @@ -6,44 +6,93 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6] - xla-version: [nightly, ] + python-version: [3.7] + pytorch-channel: [pytorch, pytorch-nightly] steps: - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + - name: Setup Miniconda + uses: goanpeca/setup-miniconda@v1 with: + miniconda-version: "latest" python-version: ${{ matrix.python-version }} + - name: Install dependencies - run: | - + shell: bash -l {0} + run: | ## Install Horovod sudo apt-get update python -m pip install --upgrade pip pip install horovod - - ## Install gsutil - echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list - sudo apt-get install -y apt-transport-https ca-certificates gnupg curl - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - - sudo apt-get update && sudo apt-get install -y google-cloud-sdk - - ## Install openblas and mkl - sudo apt-get install -y libopenblas-dev libomp5 - pip install mkl - - ## Download torch & xla - gsutil cp gs://tpu-pytorch/wheels/torch-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl . - gsutil cp gs://tpu-pytorch/wheels/torch_xla-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl . - - ## Install torch & xla - pip install torch-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl - pip install torch_xla-${{ matrix.xla-version }}-cp36-cp36m-linux_x86_64.whl - - ## Install test deps and Ignite + + #install other dependencies + conda install pytorch torchvision cpuonly -c ${{ matrix.pytorch-channel }} pip install -r requirements-dev.txt + # Fixes #1153 + pip install --upgrade scipy==1.4.1 python setup.py install - - + + - name: Run Tests + shell: bash -l {0} + run: | + SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} CI_PYTHON_VERSION="${{ matrix.python-version }}" sh tests/run_cpu_tests.sh + - name: Run MNIST Examples + shell: bash -l {0} + run: | + # MNIST + # 1) mnist.py + python examples/mnist/mnist.py --epochs=1 + - name: Run MNIST with loggers Examples + shell: bash -l {0} + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + # 2) mnist_with_visdom.py + python -c "from visdom.server import download_scripts; download_scripts()" # download scripts : https://github.com/facebookresearch/visdom/blob/master/py/server.py#L929 + python -m visdom.server & + sleep 10 + python examples/mnist/mnist_with_visdom.py --epochs=1 + kill %1 + # 3.1) mnist_with_tensorboard.py with tbX + python examples/mnist/mnist_with_tensorboard.py --epochs=1 + # 3.2) mnist_with_tensorboard.py with native torch tb + pip uninstall -y tensorboardX + python examples/mnist/mnist_with_tensorboard.py --epochs=1 + - name: Run MNIST Example With Crash + shell: bash -l {0} + if: ${{ matrix.os == 'ubuntu-latest' }} + continue-on-error: true + run: | + # 4) mnist_save_resume_engine.py + python examples/mnist/mnist_save_resume_engine.py --epochs=2 --crash_iteration 1100 + + - name: Resume MNIST from previous crash + shell: bash -l {0} + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + python examples/mnist/mnist_save_resume_engine.py --epochs=2 --resume_from=/tmp/mnist_save_resume/checkpoint_1.pt + - name: Run GAN example + shell: bash -l {0} + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + # DCGAN + python examples/gan/dcgan.py --dataset fake --dataroot /tmp/fakedata --output-dir /tmp/outputs-dcgan --batch-size 2 --epochs 2 --workers 0 + + - name: Run RL Examples + shell: bash -l {0} + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + # RL + # 1) Actor-Critic + python examples/reinforcement_learning/actor_critic.py --max-episodes=2 + # 2) Reinforce + python examples/reinforcement_learning/reinforce.py --max-episodes=2 + - name: Run Neural Style Example + shell: bash -l {0} + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + #fast-neural-style + #train + mkdir -p ~/.cache/torch/checkpoints/ && wget "https://download.pytorch.org/models/vgg16-397923af.pth" -O ~/.cache/torch/checkpoints/vgg16-397923af.pth + python examples/fast_neural_style/neural_style.py train --epochs 1 --cuda 0 --dataset test --dataroot . --image_size 32 --style_image examples/fast_neural_style/images/style_images/mosaic.jpg --style_size 32 + From 103833bd21c9003f0f884e61986a432cb07b51c3 Mon Sep 17 00:00:00 2001 From: ramesht007 Date: Thu, 8 Oct 2020 16:14:35 +0530 Subject: [PATCH 4/7] Update hvd-tests.yml --- .github/workflows/hvd-tests.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml index 07d9f84a8fa1..73cd174a4af3 100644 --- a/.github/workflows/hvd-tests.yml +++ b/.github/workflows/hvd-tests.yml @@ -7,7 +7,7 @@ jobs: strategy: matrix: python-version: [3.7] - pytorch-channel: [pytorch, pytorch-nightly] + pytorch-channel: [pytorch] steps: @@ -21,28 +21,26 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - ## Install Horovod - sudo apt-get update - python -m pip install --upgrade pip - pip install horovod - #install other dependencies conda install pytorch torchvision cpuonly -c ${{ matrix.pytorch-channel }} pip install -r requirements-dev.txt # Fixes #1153 pip install --upgrade scipy==1.4.1 + pip install horovod python setup.py install - name: Run Tests shell: bash -l {0} run: | SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} CI_PYTHON_VERSION="${{ matrix.python-version }}" sh tests/run_cpu_tests.sh + - name: Run MNIST Examples shell: bash -l {0} run: | # MNIST # 1) mnist.py python examples/mnist/mnist.py --epochs=1 + - name: Run MNIST with loggers Examples shell: bash -l {0} if: ${{ matrix.os == 'ubuntu-latest' }} @@ -58,6 +56,7 @@ jobs: # 3.2) mnist_with_tensorboard.py with native torch tb pip uninstall -y tensorboardX python examples/mnist/mnist_with_tensorboard.py --epochs=1 + - name: Run MNIST Example With Crash shell: bash -l {0} if: ${{ matrix.os == 'ubuntu-latest' }} @@ -71,6 +70,7 @@ jobs: if: ${{ matrix.os == 'ubuntu-latest' }} run: | python examples/mnist/mnist_save_resume_engine.py --epochs=2 --resume_from=/tmp/mnist_save_resume/checkpoint_1.pt + - name: Run GAN example shell: bash -l {0} if: ${{ matrix.os == 'ubuntu-latest' }} @@ -87,6 +87,7 @@ jobs: python examples/reinforcement_learning/actor_critic.py --max-episodes=2 # 2) Reinforce python examples/reinforcement_learning/reinforce.py --max-episodes=2 + - name: Run Neural Style Example shell: bash -l {0} if: ${{ matrix.os == 'ubuntu-latest' }} From 484651da831dd72204dd36ecdc06b944646d4be3 Mon Sep 17 00:00:00 2001 From: ramesht007 Date: Thu, 8 Oct 2020 18:05:56 +0530 Subject: [PATCH 5/7] trigger GitHub actions From bfd962bd497c3a4ab4f1cfc5e1c649b5846b5181 Mon Sep 17 00:00:00 2001 From: ramesht007 Date: Fri, 9 Oct 2020 11:12:11 +0530 Subject: [PATCH 6/7] removed examples --- .github/workflows/hvd-tests.yml | 65 +-------------------------------- 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml index 73cd174a4af3..3fdc4ef0d218 100644 --- a/.github/workflows/hvd-tests.yml +++ b/.github/workflows/hvd-tests.yml @@ -32,68 +32,5 @@ jobs: - name: Run Tests shell: bash -l {0} run: | - SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} CI_PYTHON_VERSION="${{ matrix.python-version }}" sh tests/run_cpu_tests.sh - - - name: Run MNIST Examples - shell: bash -l {0} - run: | - # MNIST - # 1) mnist.py - python examples/mnist/mnist.py --epochs=1 - - - name: Run MNIST with loggers Examples - shell: bash -l {0} - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - # 2) mnist_with_visdom.py - python -c "from visdom.server import download_scripts; download_scripts()" # download scripts : https://github.com/facebookresearch/visdom/blob/master/py/server.py#L929 - python -m visdom.server & - sleep 10 - python examples/mnist/mnist_with_visdom.py --epochs=1 - kill %1 - # 3.1) mnist_with_tensorboard.py with tbX - python examples/mnist/mnist_with_tensorboard.py --epochs=1 - # 3.2) mnist_with_tensorboard.py with native torch tb - pip uninstall -y tensorboardX - python examples/mnist/mnist_with_tensorboard.py --epochs=1 - - - name: Run MNIST Example With Crash - shell: bash -l {0} - if: ${{ matrix.os == 'ubuntu-latest' }} - continue-on-error: true - run: | - # 4) mnist_save_resume_engine.py - python examples/mnist/mnist_save_resume_engine.py --epochs=2 --crash_iteration 1100 - - - name: Resume MNIST from previous crash - shell: bash -l {0} - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - python examples/mnist/mnist_save_resume_engine.py --epochs=2 --resume_from=/tmp/mnist_save_resume/checkpoint_1.pt - - - name: Run GAN example - shell: bash -l {0} - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - # DCGAN - python examples/gan/dcgan.py --dataset fake --dataroot /tmp/fakedata --output-dir /tmp/outputs-dcgan --batch-size 2 --epochs 2 --workers 0 - - - name: Run RL Examples - shell: bash -l {0} - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - # RL - # 1) Actor-Critic - python examples/reinforcement_learning/actor_critic.py --max-episodes=2 - # 2) Reinforce - python examples/reinforcement_learning/reinforce.py --max-episodes=2 - - - name: Run Neural Style Example - shell: bash -l {0} - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - #fast-neural-style - #train - mkdir -p ~/.cache/torch/checkpoints/ && wget "https://download.pytorch.org/models/vgg16-397923af.pth" -O ~/.cache/torch/checkpoints/vgg16-397923af.pth - python examples/fast_neural_style/neural_style.py train --epochs 1 --cuda 0 --dataset test --dataroot . --image_size 32 --style_image examples/fast_neural_style/images/style_images/mosaic.jpg --style_size 32 + SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} CI_PYTHON_VERSION="${{ matrix.python-version }}" sh tests/run_cpu_tests.sh From c3e11b3f46c938d3c52a19b4ebf680cc560c07be Mon Sep 17 00:00:00 2001 From: ramesht007 Date: Fri, 9 Oct 2020 12:41:10 +0530 Subject: [PATCH 7/7] trigger GitHub actions