Skip to content

Commit 3237101

Browse files
Upgrade nightly wheels to rocm5.5 (#1407)
* Add MIOpen db files to wheel * Update magma commits for various branches to include header path updates * Add ROCm5.5 support with Navi31-tuned MIOpen branch * Upgrade nightly wheels to rocm5.5 * Update build_docker.sh for gfx1100 * Update build_docker.sh for gfx1100 --------- Co-authored-by: Jithun Nair <[email protected]> Co-authored-by: Jithun Nair <[email protected]>
1 parent e795fee commit 3237101

10 files changed

+25
-19
lines changed

.github/workflows/build-libtorch-images.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
runs-on: ubuntu-22.04
4848
strategy:
4949
matrix:
50-
rocm_version: ["5.3", "5.4.2"]
50+
rocm_version: ["5.4.2", "5.5"]
5151
env:
5252
GPU_ARCH_TYPE: rocm
5353
GPU_ARCH_VERSION: ${{ matrix.rocm_version }}

.github/workflows/build-manywheel-images.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
runs-on: ubuntu-22.04
5050
strategy:
5151
matrix:
52-
rocm_version: ["5.3", "5.4.2"]
52+
rocm_version: ["5.4.2", "5.5"]
5353
env:
5454
GPU_ARCH_TYPE: rocm
5555
GPU_ARCH_VERSION: ${{ matrix.rocm_version }}

common/install_miopen.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ MIOPEN_CMAKE_COMMON_FLAGS="
6060
-DMIOPEN_BUILD_DRIVER=OFF
6161
"
6262
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
63-
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
63+
if [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then
64+
MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11"
65+
elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
6466
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
6567
MIOPEN_BRANCH="release/rocm-rel-5.4-staging"
6668
elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then

common/install_rocm_magma.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ MKLROOT=${MKLROOT:-/opt/intel}
1212
# "install" hipMAGMA into /opt/rocm/magma by copying after build
1313
git clone https://bitbucket.org/icl/magma.git
1414
pushd magma
15-
# fix for magma_queue memory leak issue
16-
git checkout c62d700d880c7283b33fb1d615d62fc9c7f7ca21
15+
if [[ $PYTORCH_BRANCH == "release/1.10.1" ]]; then
16+
git checkout magma_ctrl_launch_bounds
17+
else
18+
git checkout 28592a7170e4b3707ed92644bf4a689ed600c27f
19+
fi
1720
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
1821
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
1922
# TODO (1)

libtorch/Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,13 @@ RUN apt-get update -y && \
6969
apt-get install python -y && \
7070
apt-get clean
7171

72-
FROM rocm as rocm5.3
73-
RUN ROCM_VERSION=5.3 bash ./install_rocm.sh && rm install_rocm.sh
72+
FROM rocm as rocm5.4.2
73+
RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
7474
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
7575
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
7676

77-
FROM rocm as rocm5.4.2
78-
RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh
77+
FROM rocm as rocm5.5
78+
RUN ROCM_VERSION=5.5 bash ./install_rocm.sh && rm install_rocm.sh
7979
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
8080
#RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
8181

libtorch/build_all_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ for cuda_version in 12.1 11.8 11.7; do
88
GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh"
99
done
1010

11-
for rocm_version in 5.3 5.4.2; do
11+
for rocm_version in 5.4.2 5.5; do
1212
GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh"
1313
done

libtorch/build_docker.sh

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,14 @@ case ${GPU_ARCH_TYPE} in
2828
BASE_TARGET=rocm${GPU_ARCH_VERSION}
2929
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
3030
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-magma
31-
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908"
31+
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
3232
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
3333
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
3434
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
3535
else
3636
echo "ERROR: rocm regex failed"
3737
exit 1
3838
fi
39-
if [[ $ROCM_VERSION_INT -ge 40300 ]]; then
40-
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030"
41-
fi
4239
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
4340
;;
4441
*)

manywheel/build_all_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ for cuda_version in 11.8 11.7; do
1414
MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh"
1515
done
1616

17-
for rocm_version in 5.3 5.4.2; do
17+
for rocm_version in 5.4.2 5.5; do
1818
GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
1919
MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
2020
done

manywheel/build_docker.sh

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,14 @@ case ${GPU_ARCH_TYPE} in
4545
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
4646
LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION}
4747
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-magma-miopen-staging
48-
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908"
48+
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
4949
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
5050
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
5151
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
5252
else
5353
echo "ERROR: rocm regex failed"
5454
exit 1
5555
fi
56-
if [[ $ROCM_VERSION_INT -ge 40300 ]]; then
57-
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030"
58-
fi
5956
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
6057
;;
6158
*)

manywheel/build_rocm.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
146146
OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
147147
ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
148148

149+
# MIOpen library files
150+
MIOPEN_SHARE_SRC=$ROCM_HOME/share/miopen/db
151+
MIOPEN_SHARE_DST=share/miopen/db
152+
MIOPEN_SHARE_FILES=($(ls $MIOPEN_SHARE_SRC | grep -E $ARCH))
153+
149154
# ROCm library files
150155
ROCM_SO_PATHS=()
151156
for lib in "${ROCM_SO_FILES[@]}"
@@ -174,11 +179,13 @@ DEPS_SONAME=(
174179

175180
DEPS_AUX_SRCLIST=(
176181
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}"
182+
"${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/}"
177183
"/opt/amdgpu/share/libdrm/amdgpu.ids"
178184
)
179185

180186
DEPS_AUX_DSTLIST=(
181187
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}"
188+
"${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/}"
182189
"share/libdrm/amdgpu.ids"
183190
)
184191

0 commit comments

Comments
 (0)