diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml index d95f9208c..95d96a1a0 100644 --- a/.github/workflows/build-libtorch-images.yml +++ b/.github/workflows/build-libtorch-images.yml @@ -47,7 +47,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - rocm_version: ["5.3", "5.4.2"] + rocm_version: ["5.4.2", "5.5"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index b171edb80..4fa947118 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -49,7 +49,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - rocm_version: ["5.3", "5.4.2"] + rocm_version: ["5.4.2", "5.5"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} diff --git a/common/install_miopen.sh b/common/install_miopen.sh index a5166c097..69de4fe5b 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -60,7 +60,9 @@ MIOPEN_CMAKE_COMMON_FLAGS=" -DMIOPEN_BUILD_DRIVER=OFF " # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version -if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then +if [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then + MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11" +elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" MIOPEN_BRANCH="release/rocm-rel-5.4-staging" elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh index 00540fbec..73c0dbe55 100644 --- a/common/install_rocm_magma.sh +++ b/common/install_rocm_magma.sh @@ -12,8 +12,11 @@ MKLROOT=${MKLROOT:-/opt/intel} # "install" hipMAGMA into /opt/rocm/magma by copying after build git clone https://bitbucket.org/icl/magma.git pushd magma -# fix for magma_queue memory leak issue -git checkout c62d700d880c7283b33fb1d615d62fc9c7f7ca21 +if [[ $PYTORCH_BRANCH == "release/1.10.1" ]]; then + git checkout magma_ctrl_launch_bounds +else + git checkout 28592a7170e4b3707ed92644bf4a689ed600c27f +fi cp make.inc-examples/make.inc.hip-gcc-mkl make.inc echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc # TODO (1) diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile index cfe1208bd..84dffe0d8 100644 --- a/libtorch/Dockerfile +++ b/libtorch/Dockerfile @@ -69,13 +69,13 @@ RUN apt-get update -y && \ apt-get install python -y && \ apt-get clean -FROM rocm as rocm5.3 -RUN ROCM_VERSION=5.3 bash ./install_rocm.sh && rm install_rocm.sh +FROM rocm as rocm5.4.2 +RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh #RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh -FROM rocm as rocm5.4.2 -RUN ROCM_VERSION=5.4.2 bash ./install_rocm.sh && rm install_rocm.sh +FROM rocm as rocm5.5 +RUN ROCM_VERSION=5.5 bash ./install_rocm.sh && rm install_rocm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh #RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh index 3c8aff9cb..9b657be9b 100755 --- a/libtorch/build_all_docker.sh +++ b/libtorch/build_all_docker.sh @@ -8,6 +8,6 @@ for cuda_version in 12.1 11.8 11.7; do GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh" done -for rocm_version in 5.3 5.4.2; do +for rocm_version in 5.4.2 5.5; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh" done diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh index d5505471b..50d75c709 100755 --- a/libtorch/build_docker.sh +++ b/libtorch/build_docker.sh @@ -28,7 +28,7 @@ case ${GPU_ARCH_TYPE} in BASE_TARGET=rocm${GPU_ARCH_VERSION} DOCKER_TAG=rocm${GPU_ARCH_VERSION} GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-magma - PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908" + PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) @@ -36,9 +36,6 @@ case ${GPU_ARCH_TYPE} in echo "ERROR: rocm regex failed" exit 1 fi - if [[ $ROCM_VERSION_INT -ge 40300 ]]; then - PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030" - fi DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" ;; *) diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh index 7a695f51c..66b58a718 100644 --- a/manywheel/build_all_docker.sh +++ b/manywheel/build_all_docker.sh @@ -14,7 +14,7 @@ for cuda_version in 11.8 11.7; do MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh" done -for rocm_version in 5.3 5.4.2; do +for rocm_version in 5.4.2 5.5; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" done diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh index b80749083..55abdd1d9 100755 --- a/manywheel/build_docker.sh +++ b/manywheel/build_docker.sh @@ -45,7 +45,7 @@ case ${GPU_ARCH_TYPE} in DOCKER_TAG=rocm${GPU_ARCH_VERSION} LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION} GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-magma-miopen-staging - PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908" + PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) @@ -53,9 +53,6 @@ case ${GPU_ARCH_TYPE} in echo "ERROR: rocm regex failed" exit 1 fi - if [[ $ROCM_VERSION_INT -ge 40300 ]]; then - PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx90a;gfx1030" - fi DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9" ;; *) diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh index 198237a99..b910e552f 100755 --- a/manywheel/build_rocm.sh +++ b/manywheel/build_rocm.sh @@ -146,6 +146,11 @@ ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH) OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx) ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES) +# MIOpen library files +MIOPEN_SHARE_SRC=$ROCM_HOME/share/miopen/db +MIOPEN_SHARE_DST=share/miopen/db +MIOPEN_SHARE_FILES=($(ls $MIOPEN_SHARE_SRC | grep -E $ARCH)) + # ROCm library files ROCM_SO_PATHS=() for lib in "${ROCM_SO_FILES[@]}" @@ -174,11 +179,13 @@ DEPS_SONAME=( DEPS_AUX_SRCLIST=( "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}" + "${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/}" "/opt/amdgpu/share/libdrm/amdgpu.ids" ) DEPS_AUX_DSTLIST=( "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}" + "${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/}" "share/libdrm/amdgpu.ids" )