Skip to content

Commit d142cdf

Browse files
q10facebook-github-bot
authored andcommitted
Re-enable Tests for OSS PR Builds (#1598)
Summary: - Upgrade the FBGEMM_GPU release workflows to use the same build scripts framework for running the builds - Update the build scripts framework to properly support running PyTests - Disable certain tests from running in the **CPU-only build** as they are known to be failing at the moment (`jagged_tensor_ops_test`, `uvm_test`) - Update both the nightly and release jobs to actually run tests prior to publishing to PyPI - Fix bug with PyPI publishing for the FBGEMM_GPU-CPU nightly job - Update the nightly build cron schedules to follow PST - Remove some jobs from FBGEMM_GPU CI as they are now redundant with the FBGEMM_GPU nightly / release builds - Update the FBGEMM_GPU-CPU CI job (Ubuntu) to use the build scripts framework for building and running tests Pull Request resolved: #1598 Reviewed By: brad-mengchi Differential Revision: D43448188 Pulled By: q10 fbshipit-source-id: 81ec5c0ee2d9c7d1246649a31f669b0ed585d23d
1 parent ea9b159 commit d142cdf

File tree

7 files changed

+522
-591
lines changed

7 files changed

+522
-591
lines changed

.github/scripts/setup_env.bash

Lines changed: 152 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,66 @@ test_env_var () {
9393
fi
9494
}
9595

96+
install_system_packages () {
97+
if [ $# -le 0 ]; then
98+
echo "Usage: ${FUNCNAME[0]} PACKAGE_NAME ... "
99+
echo "Example(s):"
100+
echo " ${FUNCNAME[0]} miopen-hip miopen-hip-dev"
101+
return 1
102+
fi
103+
104+
if which sudo; then
105+
update_cmd=("sudo")
106+
install_cmd=("sudo")
107+
else
108+
update_cmd=()
109+
install_cmd=()
110+
fi
111+
112+
if which apt-get; then
113+
update_cmd+=(apt update -y)
114+
install_cmd+=(apt install -y "$@")
115+
elif which yum; then
116+
update_cmd+=(yum update -y)
117+
install_cmd+=(yum install -y "$@")
118+
else
119+
echo "[INSTALL] Could not find a system package installer to install packages!"
120+
return 1
121+
fi
122+
123+
echo "[INSTALL] Updating system repositories ..."
124+
# shellcheck disable=SC2068
125+
print_exec ${update_cmd[@]}
126+
127+
# shellcheck disable=SC2145
128+
echo "[INSTALL] Installing system package(s): $@ ..."
129+
# shellcheck disable=SC2068
130+
print_exec ${install_cmd[@]}
131+
}
132+
133+
run_python_test () {
134+
env_name="$1"
135+
python_test_file="$2"
136+
if [ "$python_test_file" == "" ]; then
137+
echo "Usage: ${FUNCNAME[0]} ENV_NAME PYTHON_TEST_FILE"
138+
echo "Example(s):"
139+
echo " ${FUNCNAME[0]} build_env quantize_ops_test.py"
140+
return 1
141+
else
142+
echo "################################################################################"
143+
echo "# [$(date --utc +%FT%T.%3NZ)] Run Python Test Suite:"
144+
echo "# ${python_test_file}"
145+
echo "################################################################################"
146+
fi
147+
148+
if conda run -n "${env_name}" python -m pytest -v -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then
149+
echo "[TEST] Python test suite PASSED: ${python_test_file}"
150+
else
151+
echo "[TEST] Python test suite FAILED: ${python_test_file}"
152+
return 1
153+
fi
154+
}
155+
96156
print_system_info () {
97157
echo "################################################################################"
98158
echo "# Print System Info"
@@ -114,12 +174,7 @@ print_system_info () {
114174
print_exec cat /etc/os-release
115175

116176
echo "[INFO] Check GPU info"
117-
if which apt-get; then
118-
print_exec sudo apt-get install -y lshw
119-
else
120-
print_exec sudo yum install -y lshw
121-
fi
122-
177+
install_system_packages lshw
123178
print_exec sudo lshw -C display
124179
}
125180

@@ -412,10 +467,6 @@ install_cuda () {
412467
# Ensure that the libraries are properly installed
413468
test_filepath "${env_name}" libnvToolsExt.so || return 1
414469

415-
# LIBNVTOOLSEXT
416-
# CUDA_TOOLKIT_ROOT_DIR
417-
# print_exec conda env config vars set -n "${env_name}" CUDNN_INCLUDE_DIR="${install_path}/include" CUDNN_LIBRARY="${install_path}/lib"
418-
419470
# Print nvcc version
420471
print_exec conda run -n "${env_name}" nvcc --version
421472
echo "[INSTALL] Successfully installed CUDA ${cuda_version}"
@@ -440,9 +491,8 @@ install_cxx_compiler () {
440491
fi
441492

442493
if [ "$use_yum" != "" ]; then
443-
echo "[INSTALL] Installing C/C++ compilers through Yum ..."
444-
print_exec sudo yum update -y
445-
print_exec sudo yum install -y gcc gcc-c++
494+
echo "[INSTALL] Installing C/C++ compilers through yum ..."
495+
install_system_packages gcc gcc-c++
446496
else
447497
# Install gxx_linux-64 from main instead of cxx-compiler from conda-forge, as
448498
# the latter breaks builds:
@@ -770,6 +820,95 @@ build_fbgemm_gpu_install () {
770820
echo "[BUILD] FBGEMM-GPU build + install completed"
771821
}
772822

823+
install_fbgemm_gpu_package () {
824+
env_name="$1"
825+
package_name="$2"
826+
if [ "$package_name" == "" ]; then
827+
echo "Usage: ${FUNCNAME[0]} ENV_NAME WHEEL_NAME"
828+
echo "Example(s):"
829+
echo " ${FUNCNAME[0]} build_env fbgemm_gpu.whl # Install the package (wheel)"
830+
return 1
831+
else
832+
echo "################################################################################"
833+
echo "# Install FBGEMM-GPU Package (Wheel)"
834+
echo "#"
835+
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
836+
echo "################################################################################"
837+
echo ""
838+
fi
839+
840+
echo "[BUILD] Installing FBGEMM-GPU wheel: ${package_name} ..."
841+
conda run -n "${env_name}" python -m pip install "${package_name}"
842+
843+
echo "[BUILD] Checking imports ..."
844+
test_python_import "${env_name}" fbgemm_gpu || return 1
845+
test_python_import "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers || return 1
846+
847+
echo "[BUILD] Wheel installation completed ..."
848+
}
849+
850+
851+
################################################################################
852+
# Publish Functions
853+
################################################################################
854+
855+
run_fbgemm_gpu_tests () {
856+
env_name="$1"
857+
cpu_only="$2"
858+
if [ "$env_name" == "" ]; then
859+
echo "Usage: ${FUNCNAME[0]} ENV_NAME [CPU_ONLY]"
860+
echo "Example(s):"
861+
echo " ${FUNCNAME[0]} build_env # Run all tests"
862+
echo " ${FUNCNAME[0]} build_env 1 # Skip tests known to be broken in CPU-only mode"
863+
return 1
864+
else
865+
echo "################################################################################"
866+
echo "# Run FBGEMM-GPU Tests"
867+
echo "#"
868+
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
869+
echo "################################################################################"
870+
echo ""
871+
fi
872+
873+
# These are either non-tests or currently-broken tests in both FBGEMM_GPU and FBGEMM_GPU-CPU
874+
files_to_skip=(
875+
split_table_batched_embeddings_test.py
876+
test_utils.py
877+
ssd_split_table_batched_embeddings_test.py
878+
)
879+
880+
if [ "$cpu_only" != "" ]; then
881+
# These are tests that are currently broken in FBGEMM_GPU-CPU
882+
unstable_tests=(
883+
jagged_tensor_ops_test.py
884+
uvm_test.py
885+
)
886+
else
887+
unstable_tests=()
888+
fi
889+
890+
echo "[TEST] Installing pytest ..."
891+
print_exec conda install -n "${env_name}" -y pytest
892+
893+
echo "[BUILD] Checking imports ..."
894+
test_python_import "${env_name}" fbgemm_gpu || return 1
895+
896+
# NOTE: These tests running on single CPU core with a less powerful testing
897+
# GPU in GHA can take up to 5 hours.
898+
for test_file in *.py; do
899+
if echo "${files_to_skip[@]}" | grep "${test_file}"; then
900+
echo "[TEST] Skipping test file known to be broken: ${test_file}"
901+
elif echo "${unstable_tests[@]}" | grep "${test_file}"; then
902+
echo "[TEST] Skipping test file: ${test_file}"
903+
elif run_python_test "${env_name}" "${test_file}"; then
904+
echo ""
905+
else
906+
return 1
907+
fi
908+
done
909+
}
910+
911+
773912
################################################################################
774913
# Publish Functions
775914
################################################################################

.github/workflows/fbgemm_gpu_ci.yml

Lines changed: 35 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -14,81 +14,6 @@ on:
1414
- main
1515

1616
jobs:
17-
build_nvidia_gpu:
18-
if: ${{ false }} # Disable the job for now
19-
runs-on: ${{ matrix.os }}
20-
defaults:
21-
run:
22-
shell: bash
23-
env:
24-
PRELUDE: .github/scripts/setup_env.bash
25-
BUILD_ENV: build_binary
26-
strategy:
27-
# Don't fast-fail all the other builds if one of the them fails
28-
fail-fast: false
29-
matrix:
30-
os: [ ubuntu-20.04 ]
31-
python-version: [ "3.8" ]
32-
# As of version 2.0, PyTorch has dropped support for CUDA 11.6
33-
cuda-version: [ 11.7.1 ]
34-
35-
steps:
36-
- name: Checkout the Repository
37-
uses: actions/checkout@v3
38-
with:
39-
submodules: true
40-
41-
- name: Display System Info
42-
run: . $PRELUDE; print_system_info
43-
44-
- name: Setup Miniconda
45-
run: |
46-
. $PRELUDE; setup_miniconda $HOME/miniconda
47-
echo "${HOME}/miniconda/bin" >> $GITHUB_PATH
48-
echo "CONDA=${HOME}/miniconda" >> $GITHUB_PATH
49-
50-
- name: Create Conda Environment
51-
run: |
52-
. $PRELUDE
53-
create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
54-
55-
# This hack is needed to get builds running properly on Ubuntu 20.04
56-
echo "[SETUP] Creating symlink \$CONDA_PREFIX/lib64 -> \$CONDA_PREFIX/lib ..."
57-
conda_prefix=$(conda run -n "${env_name}" printenv CONDA_PREFIX)
58-
ln -s "${conda_prefix}/lib" "${conda_prefix}/lib64"
59-
60-
# - name: Install C/C++ Compilers
61-
# run: . $PRELUDE; install_cxx_compiler $BUILD_ENV
62-
63-
- name: Install Build Tools
64-
run: . $PRELUDE; install_build_tools $BUILD_ENV
65-
66-
- name: Install CUDA
67-
run: . $PRELUDE; install_cuda $BUILD_ENV "${{ matrix.cuda-version }}"
68-
69-
- name: Install PyTorch
70-
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda "${{ matrix.cuda-version }}"
71-
72-
- name: Install cuDNN
73-
run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" "${{ matrix.cuda-version }}"
74-
75-
- name: Prepare FBGEMM Build
76-
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
77-
78-
- name: Build and Install FBGEMM_GPU
79-
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV
80-
81-
- name: Test FBGEMM_GPU installation
82-
shell: bash
83-
run: |
84-
. $PRELUDE;
85-
cd fbgemm_gpu/test
86-
print_exec conda run -n $BUILD_ENV python input_combine_test.py -v
87-
print_exec conda run -n $BUILD_ENV python quantize_ops_test.py -v
88-
print_exec conda run -n $BUILD_ENV python sparse_ops_test.py -v
89-
conda run -n $BUILD_ENV python -c "import fbgemm_gpu"
90-
conda run -n $BUILD_ENV python -c "import fbgemm_gpu.split_embedding_codegen_lookup_invokers"
91-
9217
build_amd_gpu:
9318
if: ${{ false }} # Disable the job for now
9419
runs-on: ${{ matrix.os }}
@@ -200,71 +125,48 @@ jobs:
200125
"
201126
docker run $DOCKER_OPTIONS $DOCKER_IMAGE $JENKINS_REPO_DIR_DOCKER/.jenkins/rocm/build_and_test.sh $JENKINS_REPO_DIR_DOCKER
202127
203-
test_nvidia_gpu:
204-
if: ${{ false }} # Disable the job for now
205-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
206-
with:
207-
job-name: cuda 11.7, A10
208-
runner: linux.g5.4xlarge.nvidia.gpu # A10
209-
repository: pytorch/fbgemm
210-
gpu-arch-type: cuda
211-
gpu-arch-version: 11.7
212-
timeout: 150
213-
script: |
214-
set -x
215-
# Checkout FBGEMM_GPU
216-
git submodule update --init
217-
218-
# Build FBGEMM_GPU with pytorch-nightly
219-
CUDA_VERSION="11.7.1"
220-
PYTHON_VERSION="3.10"
221-
bash .github/scripts/build_wheel.bash -v -p "$PYTHON_VERSION" -o fbgemm_gpu_test -P pytorch-nightly -c "$CUDA_VERSION" -m /opt/conda
222-
223-
# Test FBGEMM_GPU using a generated wheel file
224-
WHEEL_PATH="$(ls fbgemm_gpu/dist/*.whl)"
225-
bash .github/scripts/test_wheel.bash -v -p "$PYTHON_VERSION" -P pytorch-nightly -c "$CUDA_VERSION" -w "$WHEEL_PATH" -m /opt/conda
226-
227-
build_cpu_only:
128+
build_and_test_cpu:
228129
runs-on: ${{ matrix.os }}
130+
defaults:
131+
run:
132+
shell: bash
133+
env:
134+
PRELUDE: .github/scripts/setup_env.bash
135+
BUILD_ENV: build_binary
229136
strategy:
230137
matrix:
231-
os: [ubuntu-latest]
138+
os: [ ubuntu-20.04, ubuntu-latest ]
139+
python-version: [ "3.8", "3.9", "3.10" ]
232140

233141
steps:
234-
- uses: actions/checkout@v3
142+
- name: Checkout the Repository
143+
uses: actions/checkout@v3
144+
with:
145+
submodules: true
235146

236-
- name: Install dependencies
237-
shell: bash
238-
run: |
239-
sudo apt-get update
240-
sudo apt-get -y install git pip python3-dev
241-
sudo pip install cmake scikit-build ninja jinja2 numpy hypothesis --no-input
242-
# Install PyTorch (nightly) as required by fbgemm_gpu
243-
sudo pip install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
147+
- name: Display System Info
148+
run: . $PRELUDE; print_system_info
244149

245-
- name: Checkout submodules
246-
shell: bash
150+
- name: Setup Miniconda
247151
run: |
248-
cd fbgemm_gpu
249-
git submodule sync
250-
git submodule update --init --recursive
152+
. $PRELUDE; setup_miniconda $HOME/miniconda
153+
echo "${HOME}/miniconda/bin" >> $GITHUB_PATH
154+
echo "CONDA=${HOME}/miniconda" >> $GITHUB_PATH
251155
252-
- name: Build fbgemm_gpu
253-
shell: bash
254-
run: |
255-
cd fbgemm_gpu
256-
# to avoid "Permission denied" error in '/usr/local/lib/python3.8/dist-packages/' folder
257-
sudo python setup.py install --cpu_only
156+
- name: Create Conda Environment
157+
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
258158

259-
- name: Test fbgemm_gpu cpu-only installation
260-
shell: bash
261-
run: |
262-
cd fbgemm_gpu
263-
cd test
264-
python batched_unary_embeddings_test.py -v
265-
python input_combine_test.py -v
266-
python layout_transform_ops_test.py -v
267-
python merge_pooled_embeddings_test.py -v
268-
python permute_pooled_embedding_modules_test.py -v
269-
python quantize_ops_test.py -v
270-
python sparse_ops_test.py -v
159+
- name: Install Build Tools
160+
run: . $PRELUDE; install_build_tools $BUILD_ENV
161+
162+
- name: Install PyTorch
163+
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cpu
164+
165+
- name: Prepare FBGEMM Build
166+
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
167+
168+
- name: Build and Install FBGEMM_GPU (CPU version)
169+
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV cpuonly
170+
171+
- name: Test with PyTest
172+
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpuonly

0 commit comments

Comments
 (0)