Skip to content

Commit a9269ae

Browse files
authored
Add nightly testing to CUDA CI (#18078)
1 parent 281d6a2 commit a9269ae

File tree

3 files changed

+44
-8
lines changed

3 files changed

+44
-8
lines changed

.azure/gpu-tests-fabric.yml

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,23 @@ jobs:
5353
FREEZE_REQUIREMENTS: "1"
5454
PIP_CACHE_DIR: "/var/tmp/pip"
5555
container:
56-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1"
56+
image: $(image)
5757
# default shm size is 64m. Increase it to avoid:
5858
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
5959
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
6060
strategy:
6161
matrix:
62-
'pkg: Fabric':
62+
'Fabric | latest':
63+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1"
64+
IS_NIGHTLY: "false"
6365
PACKAGE_NAME: "fabric"
64-
'pkg: Lightning':
66+
'Lightning | latest':
67+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1"
68+
IS_NIGHTLY: "false"
69+
PACKAGE_NAME: "lightning"
70+
'Lightning | nightly':
71+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1"
72+
IS_NIGHTLY: "true"
6573
PACKAGE_NAME: "lightning"
6674
workspace:
6775
clean: all
@@ -80,6 +88,7 @@ jobs:
8088
echo $CUDA_VISIBLE_DEVICES
8189
echo $CUDA_VERSION_MM
8290
echo $TORCH_URL
91+
echo $(IS_NIGHTLY)
8392
echo $COVERAGE_SOURCE
8493
whereis nvidia
8594
nvidia-smi
@@ -96,13 +105,23 @@ jobs:
96105
for fpath in `ls requirements/**/*.txt`; do \
97106
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
98107
done
108+
# without succeeded this could run even if the job has already failed
109+
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false'))
99110
displayName: 'Adjust dependencies'
100111
101112
- bash: |
102113
extra=$(python -c "print({'lightning': 'fabric-'}.get('$(PACKAGE_NAME)', ''))")
103114
pip install -e ".[${extra}dev]" pytest-timeout -U --find-links ${TORCH_URL}
104115
displayName: 'Install package & dependencies'
105116
117+
- bash: |
118+
pip uninstall -y torch torchvision
119+
pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MM%}
120+
python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver"
121+
# without succeeded this could run even if the job has already failed
122+
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true'))
123+
displayName: 'Bump to nightly'
124+
106125
- bash: |
107126
set -e
108127
python requirements/collect_env_details.py

.azure/gpu-tests-pytorch.yml

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,15 @@ jobs:
5353
matrix:
5454
'PyTorch | latest':
5555
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1"
56-
scope: ""
56+
IS_NIGHTLY: "false"
5757
PACKAGE_NAME: "pytorch"
5858
'Lightning | latest':
5959
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1"
60-
scope: ""
60+
IS_NIGHTLY: "false"
61+
PACKAGE_NAME: "lightning"
62+
'Lightning | nightly':
63+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1"
64+
IS_NIGHTLY: "true"
6165
PACKAGE_NAME: "lightning"
6266
pool: lit-rtx-3090
6367
variables:
@@ -87,6 +91,7 @@ jobs:
8791
echo $CUDA_VISIBLE_DEVICES
8892
echo $CUDA_VERSION_MM
8993
echo $TORCH_URL
94+
echo $(IS_NIGHTLY)
9095
echo $COVERAGE_SOURCE
9196
whereis nvidia
9297
nvidia-smi
@@ -103,18 +108,30 @@ jobs:
103108
for fpath in `ls requirements/**/*.txt`; do \
104109
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
105110
done
106-
# prune packages with installation issues
111+
# without succeeded this could run even if the job has already failed
112+
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false'))
113+
displayName: 'Adjust dependencies'
114+
115+
- bash: |
107116
pip install -q -r .actions/requirements.txt
108117
python .actions/assistant.py requirements_prune_pkgs \
109118
--packages="[lightning-colossalai,lightning-bagua]" \
110119
--req_files="[requirements/_integrations/strategies.txt]"
111-
displayName: 'Adjust dependencies'
120+
displayName: 'Prune packages' # these have installation issues
112121
113122
- bash: |
114123
extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))")
115124
pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links ${TORCH_URL}
116125
displayName: 'Install package & dependencies'
117126
127+
- bash: |
128+
pip uninstall -y torch torchvision
129+
pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MM%}
130+
python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver"
131+
# without succeeded this could run even if the job has already failed
132+
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true'))
133+
displayName: 'Bump to nightly'
134+
118135
- bash: pip uninstall -y lightning
119136
condition: eq(variables['PACKAGE_NAME'], 'pytorch')
120137
# Lightning is dependency of Habana or other accelerators/integrations so in case we test PL we need to remove it

examples/pytorch/basics/autoencoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def _to_grid(self, images):
8181
nrow=self.nrow,
8282
padding=self.padding,
8383
normalize=self.normalize,
84-
range=self.norm_range,
84+
value_range=self.norm_range,
8585
scale_each=self.scale_each,
8686
pad_value=self.pad_value,
8787
)

0 commit comments

Comments
 (0)