Skip to content

Commit add7596

Browse files
authored
[Nova] GHA Linux GPU Job (#6804)
* [Nova] GHA Linux GPU Job * increase timeout since jobs timeout and cancel after 30 mins * bigger instance and longer timeout * use instance with more gpu memory and only run py38 and cu116 on PR CI
1 parent c84dbfa commit add7596

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed

.github/workflows/test-linux-cpu.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
tests:
1717
strategy:
1818
matrix:
19-
py_vers: ["3.7", "3.8", "3.9", "3.10"]
19+
python_version: ["3.7", "3.8", "3.9", "3.10"]
2020
fail-fast: false
2121
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
2222
with:
@@ -27,7 +27,7 @@ jobs:
2727
git config --global --add safe.directory /__w/vision/vision
2828
2929
# Set up Environment Variables
30-
export PYTHON_VERSION="${{ matrix.py_vers }}"
30+
export PYTHON_VERSION="${{ matrix.python_version }}"
3131
export VERSION="cpu"
3232
export CUDATOOLKIT="cpuonly"
3333

.github/workflows/test-linux-gpu.yml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
name: Unit-tests on Linux GPU
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- nightly
8+
- main
9+
- release/*
10+
workflow_dispatch:
11+
12+
env:
13+
CHANNEL: "nightly"
14+
15+
jobs:
16+
tests:
17+
strategy:
18+
matrix:
19+
python_version: ["3.8"]
20+
cuda_arch_version: ["11.6"]
21+
fail-fast: false
22+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
23+
with:
24+
runner: linux.g5.4xlarge.nvidia.gpu
25+
repository: pytorch/vision
26+
gpu-arch-type: cuda
27+
gpu-arch-version: ${{ matrix.cuda_arch_version }}
28+
timeout: 120
29+
script: |
30+
# Mark Build Directory Safe
31+
git config --global --add safe.directory /__w/vision/vision
32+
33+
# Set up Environment Variables
34+
export PYTHON_VERSION="${{ matrix.python_version }}"
35+
export VERSION="${{ matrix.cuda_arch_version }}"
36+
export CUDATOOLKIT="pytorch-cuda=${VERSION}"
37+
38+
# Set CHANNEL
39+
if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then
40+
export CHANNEL=test
41+
else
42+
export CHANNEL=nightly
43+
fi
44+
45+
# Create Conda Env
46+
conda create -yp ci_env python="${PYTHON_VERSION}" numpy libpng jpeg scipy
47+
conda activate /work/ci_env
48+
49+
# Install PyTorch, Torchvision, and testing libraries
50+
set -ex
51+
conda install \
52+
--yes \
53+
-c "pytorch-${CHANNEL}" \
54+
-c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \
55+
"${CUDATOOLKIT}"
56+
python3 setup.py develop
57+
python3 -m pip install pytest pytest-mock 'av<10'
58+
59+
# Run Tests
60+
python3 -m torch.utils.collect_env
61+
python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20

0 commit comments

Comments
 (0)