Skip to content

Commit 3f9562d

Browse files
One-step ROCm manywheel/libtorch docker build (#1418)
* Use rocm/dev-centos-7:<rocm_version>-complete image with full ROCm install * Remove ROCm install step and reinstate magma and MIOpen build steps * Install full package for MIOpen, including headers and db files Retained some of the disk-cleanup-related code from cb0912c * Use rocm/dev-ubuntu-20.04:<rocm_version>-complete image with full ROCm install * Remove ROCm install and reinstate magma build from source * Use --offload-arch instead of --amdgpu-target to silence warnings * Use beefier runner instance for ROCm docker builds * Typo * Simplify ROCm targets
1 parent 52541e8 commit 3f9562d

File tree

8 files changed

+21
-45
lines changed

8 files changed

+21
-45
lines changed

.github/workflows/build-libtorch-images.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
run: |
4545
libtorch/build_docker.sh
4646
build-docker-rocm:
47-
runs-on: ubuntu-22.04
47+
runs-on: linux.12xlarge
4848
strategy:
4949
matrix:
5050
rocm_version: ["5.4.2", "5.5"]

.github/workflows/build-manywheel-images.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
run: |
4747
manywheel/build_docker.sh
4848
build-docker-rocm:
49-
runs-on: ubuntu-22.04
49+
runs-on: linux.12xlarge
5050
strategy:
5151
matrix:
5252
rocm_version: ["5.4.2", "5.5"]

common/install_miopen.sh

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,7 @@ if [[ $ROCM_INT -lt 40001 ]]; then
3333
exit 0
3434
fi
3535

36-
# CHANGED: Do not uninstall. To avoid out of disk space issues, we will copy lib over existing.
37-
# Uninstall existing package, to avoid errors during later yum install indicating packages did not change.
38-
#yum remove -y miopen-hip
36+
yum remove -y miopen-hip
3937

4038
# Function to retry functions that sometimes timeout or have flaky failures
4139
retry () {
@@ -77,24 +75,14 @@ elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then
7775
elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then
7876
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
7977
MIOPEN_BRANCH="release/rocm-rel-5.0-staging"
80-
elif [[ $ROCM_INT -ge 40500 ]] && [[ $ROCM_INT -lt 50000 ]]; then
81-
MIOPEN_CMAKE_COMMON_FLAGS="${MIOPEN_CMAKE_COMMON_FLAGS} -DMIOPEN_USE_HIP_KERNELS=Off -DMIOPEN_DEFAULT_FIND_MODE=Normal"
82-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
83-
MIOPEN_BRANCH="release/rocm-rel-4.5-staging"
84-
elif [[ $ROCM_INT -ge 40300 ]] && [[ $ROCM_INT -lt 40500 ]]; then
85-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878;gfx1030_36"
86-
MIOPEN_BRANCH="release/rocm-rel-4.3-staging"
87-
elif [[ $ROCM_INT -ge 40200 ]] && [[ $ROCM_INT -lt 40300 ]]; then
88-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx803_36;gfx803_64;gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878"
89-
MIOPEN_BRANCH="rocm-4.2.x-staging"
9078
else
9179
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
9280
exit 1
9381
fi
9482

9583
git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH}
9684
pushd MIOpen
97-
# remove .git to save disk space ince CI runner was running out
85+
# remove .git to save disk space since CI runner was running out
9886
rm -rf .git
9987
# Don't build MLIR to save docker build time
10088
# since we are disabling MLIR backend for MIOpen anyway
@@ -122,18 +110,13 @@ PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang
122110
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
123111
make MIOpen -j $(nproc)
124112

125-
# CHANGED: Do not build package.
126113
# Build MIOpen package
127-
#make -j $(nproc) package
114+
make -j $(nproc) package
128115

129116
# clean up since CI runner was running out of disk space
130117
rm -rf /usr/local/cget
131118

132-
# CHANGED: Do not install package, just copy lib over existing.
133-
#yum install -y miopen-*.rpm
134-
dest=$(ls ${ROCM_INSTALL_PATH}/lib/libMIOpen.so.1.0.*)
135-
rm -f ${dest}
136-
cp lib/libMIOpen.so.1.0 ${dest}
119+
yum install -y miopen-*.rpm
137120

138121
popd
139122
rm -rf MIOpen

common/install_rocm_magma.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ else
3232
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
3333
fi
3434
for arch in $amdgpu_targets; do
35-
echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
35+
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
3636
done
3737
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
3838
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc

libtorch/Dockerfile

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,24 +60,18 @@ FROM cpu as rocm
6060
ARG PYTORCH_ROCM_ARCH
6161
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
6262
ENV MKLROOT /opt/intel
63-
ADD ./common/install_rocm.sh install_rocm.sh
63+
# No need to install ROCm as base docker image should have full ROCm install
64+
#ADD ./common/install_rocm.sh install_rocm.sh
6465
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
65-
#ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
66+
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
6667
# gfortran and python needed for building magma from source for ROCm
6768
RUN apt-get update -y && \
6869
apt-get install gfortran -y && \
6970
apt-get install python -y && \
7071
apt-get clean
7172

72-
FROM rocm as rocm5.4.2
73-
RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
7473
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
75-
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
76-
77-
FROM rocm as rocm5.5
78-
RUN ROCM_VERSION=5.5 bash ./install_rocm.sh && rm install_rocm.sh
79-
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
80-
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
74+
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
8175

8276
FROM ${BASE_TARGET} as final
8377
# Install LLVM

libtorch/build_docker.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ case ${GPU_ARCH_TYPE} in
2525
DOCKER_GPU_BUILD_ARG=""
2626
;;
2727
rocm)
28-
BASE_TARGET=rocm${GPU_ARCH_VERSION}
28+
BASE_TARGET=rocm
2929
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
30-
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-magma
30+
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
3131
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
3232
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
3333
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then

manywheel/Dockerfile

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,15 +157,14 @@ FROM cpu_final as rocm_final
157157
ARG ROCM_VERSION=3.7
158158
ARG PYTORCH_ROCM_ARCH
159159
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
160-
# Install ROCm
161-
ADD ./common/install_rocm.sh install_rocm.sh
162-
RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh
160+
# No need to install ROCm as base docker image should have full ROCm install
161+
#ADD ./common/install_rocm.sh install_rocm.sh
162+
#RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh
163163
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
164164
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
165165
# cmake3 is needed for the MIOpen build
166166
RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3
167-
### The following is now performed beforehand in a new GPU_IMAGE with magma and miopen preinstalled
168-
#ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
169-
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
170-
#ADD ./common/install_miopen.sh install_miopen.sh
171-
#RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
167+
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
168+
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
169+
ADD ./common/install_miopen.sh install_miopen.sh
170+
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh

manywheel/build_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ case ${GPU_ARCH_TYPE} in
4444
TARGET=rocm_final
4545
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
4646
LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION}
47-
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-magma-miopen-staging
47+
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
4848
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
4949
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
5050
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then

0 commit comments

Comments
 (0)