Skip to content

Commit a79e1ce

Browse files
tinglvvmalfet
andauthored
[aarch64] Add CUDA 12.4 build script for ARM wheel (#1775)
Add cuda_aarch64 ARM wheel build script with CUDA 12.4. Reference #1302. --------- Co-authored-by: Nikita Shulga <[email protected]>
1 parent 4e10974 commit a79e1ce

File tree

6 files changed

+348
-42
lines changed

6 files changed

+348
-42
lines changed

.github/workflows/build-manywheel-images.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ on:
1313
- .github/workflows/build-manywheel-images.yml
1414
- manywheel/Dockerfile
1515
- manywheel/Dockerfile_aarch64
16+
- manywheel/Dockerfile_cuda_aarch64
1617
- manywheel/Dockerfile_cxx11-abi
1718
- manywheel/build_docker.sh
1819
- 'common/*'
@@ -21,6 +22,7 @@ on:
2122
- .github/workflows/build-manywheel-images.yml
2223
- manywheel/Dockerfile
2324
- manywheel/Dockerfile_aarch64
25+
- manywheel/Dockerfile_cuda_aarch64
2426
- manywheel/Dockerfile_cxx11-abi
2527
- 'common/*'
2628
- manywheel/build_docker.sh
@@ -54,6 +56,25 @@ jobs:
5456
- name: Build Docker Image
5557
run: |
5658
manywheel/build_docker.sh
59+
build-docker-cuda-aarch64:
60+
runs-on: linux.arm64.2xlarge
61+
strategy:
62+
matrix:
63+
cuda_version: ["12.4"]
64+
env:
65+
GPU_ARCH_TYPE: cuda-aarch64
66+
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
67+
steps:
68+
- name: Checkout PyTorch
69+
uses: actions/checkout@v3
70+
- name: Authenticate if WITH_PUSH
71+
run: |
72+
if [[ "${WITH_PUSH}" == true ]]; then
73+
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
74+
fi
75+
- name: Build Docker Image
76+
run: |
77+
manywheel/build_docker.sh
5778
build-docker-rocm:
5879
runs-on: linux.12xlarge
5980
strategy:

aarch64_linux/aarch64_ci_build.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,10 @@ cd /
2626
git config --global --add safe.directory /pytorch
2727
pip install -r /pytorch/requirements.txt
2828
pip install auditwheel
29-
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
29+
if [ -n "$GPU_ARCH_VERSION" ]; then
30+
echo "BASE_CUDA_VERSION is set to: $GPU_ARCH_VERSION"
31+
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
32+
else
33+
echo "BASE_CUDA_VERSION is not set."
34+
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
35+
fi

aarch64_linux/aarch64_wheel_ci_build.py

Lines changed: 139 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,103 +9,201 @@
99

1010

1111
def list_dir(path: str) -> List[str]:
12-
''''
12+
"""'
1313
Helper for getting paths for Python
14-
'''
14+
"""
1515
return check_output(["ls", "-1", path]).decode().split("\n")
1616

1717

1818
def build_ArmComputeLibrary() -> None:
19-
'''
19+
"""
2020
Using ArmComputeLibrary for aarch64 PyTorch
21-
'''
22-
print('Building Arm Compute Library')
23-
acl_build_flags=["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
24-
"arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"]
25-
acl_install_dir="/acl"
26-
acl_checkout_dir="ComputeLibrary"
21+
"""
22+
print("Building Arm Compute Library")
23+
acl_build_flags = [
24+
"debug=0",
25+
"neon=1",
26+
"opencl=0",
27+
"os=linux",
28+
"openmp=1",
29+
"cppthreads=0",
30+
"arch=armv8a",
31+
"multi_isa=1",
32+
"fixed_format_kernels=1",
33+
"build=native",
34+
]
35+
acl_install_dir = "/acl"
36+
acl_checkout_dir = "ComputeLibrary"
2737
os.makedirs(acl_install_dir)
28-
check_call(["git", "clone", "https://github.com/ARM-software/ComputeLibrary.git", "-b", "v23.08",
29-
"--depth", "1", "--shallow-submodules"])
30-
check_call(["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] + acl_build_flags,
31-
cwd=acl_checkout_dir)
38+
check_call(
39+
[
40+
"git",
41+
"clone",
42+
"https://github.com/ARM-software/ComputeLibrary.git",
43+
"-b",
44+
"v23.08",
45+
"--depth",
46+
"1",
47+
"--shallow-submodules",
48+
]
49+
)
50+
check_call(
51+
["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"]
52+
+ acl_build_flags,
53+
cwd=acl_checkout_dir,
54+
)
3255
for d in ["arm_compute", "include", "utils", "support", "src"]:
3356
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
3457

3558

59+
def update_wheel(wheel_path) -> None:
60+
"""
61+
Update the cuda wheel libraries
62+
"""
63+
folder = os.path.dirname(wheel_path)
64+
wheelname = os.path.basename(wheel_path)
65+
os.mkdir(f"{folder}/tmp")
66+
os.system(f"unzip {wheel_path} -d {folder}/tmp")
67+
libs_to_copy = [
68+
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
69+
"/usr/local/cuda/lib64/libcudnn.so.8",
70+
"/usr/local/cuda/lib64/libcublas.so.12",
71+
"/usr/local/cuda/lib64/libcublasLt.so.12",
72+
"/usr/local/cuda/lib64/libcudart.so.12",
73+
"/usr/local/cuda/lib64/libcufft.so.11",
74+
"/usr/local/cuda/lib64/libcusparse.so.12",
75+
"/usr/local/cuda/lib64/libcusparseLt.so.0",
76+
"/usr/local/cuda/lib64/libcusolver.so.11",
77+
"/usr/local/cuda/lib64/libcurand.so.10",
78+
"/usr/local/cuda/lib64/libnvToolsExt.so.1",
79+
"/usr/local/cuda/lib64/libnvJitLink.so.12",
80+
"/usr/local/cuda/lib64/libnvrtc.so.12",
81+
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4",
82+
"/usr/local/cuda/lib64/libcudnn_adv_infer.so.8",
83+
"/usr/local/cuda/lib64/libcudnn_adv_train.so.8",
84+
"/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8",
85+
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8",
86+
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
87+
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
88+
"/opt/conda/envs/aarch64_env/lib/libopenblas.so.0",
89+
"/opt/conda/envs/aarch64_env/lib/libgfortran.so.5",
90+
"/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
91+
"/acl/build/libarm_compute.so",
92+
"/acl/build/libarm_compute_graph.so",
93+
"/acl/build/libarm_compute_core.so",
94+
]
95+
# Copy libraries to unzipped_folder/a/lib
96+
for lib_path in libs_to_copy:
97+
lib_name = os.path.basename(lib_path)
98+
shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}")
99+
os.system(
100+
f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libtorch_cuda.so"
101+
)
102+
os.mkdir(f"{folder}/cuda_wheel")
103+
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
104+
shutil.move(
105+
f"{folder}/cuda_wheel/{wheelname}",
106+
f"/dist/{wheelname}",
107+
copy_function=shutil.copy2,
108+
)
109+
os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/")
110+
111+
36112
def complete_wheel(folder: str) -> str:
37-
'''
113+
"""
38114
Complete wheel build and put in artifact location
39-
'''
115+
"""
40116
wheel_name = list_dir(f"/{folder}/dist")[0]
41117

42-
if "pytorch" in folder:
118+
if "pytorch" in folder and not enable_cuda:
43119
print("Repairing Wheel with AuditWheel")
44-
check_call(["auditwheel","repair", f"dist/{wheel_name}"], cwd=folder)
120+
check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
45121
repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0]
46122

47123
print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist")
48-
os.rename(f"/{folder}/wheelhouse/{repaired_wheel_name}", f"/{folder}/dist/{repaired_wheel_name}")
124+
os.rename(
125+
f"/{folder}/wheelhouse/{repaired_wheel_name}",
126+
f"/{folder}/dist/{repaired_wheel_name}",
127+
)
49128
else:
50129
repaired_wheel_name = wheel_name
51130

52-
print(f"Copying {repaired_wheel_name} to artfacts")
53-
shutil.copy2(f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}")
131+
print(f"Copying {repaired_wheel_name} to artifacts")
132+
shutil.copy2(
133+
f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}"
134+
)
54135

55136
return repaired_wheel_name
56137

57138

58139
def parse_arguments():
59-
'''
140+
"""
60141
Parse inline arguments
61-
'''
142+
"""
62143
from argparse import ArgumentParser
144+
63145
parser = ArgumentParser("AARCH64 wheels python CD")
64146
parser.add_argument("--debug", action="store_true")
65147
parser.add_argument("--build-only", action="store_true")
66148
parser.add_argument("--test-only", type=str)
67149
parser.add_argument("--enable-mkldnn", action="store_true")
150+
parser.add_argument("--enable-cuda", action="store_true")
68151
return parser.parse_args()
69152

70153

71-
if __name__ == '__main__':
72-
'''
154+
if __name__ == "__main__":
155+
"""
73156
Entry Point
74-
'''
157+
"""
75158
args = parse_arguments()
76159
enable_mkldnn = args.enable_mkldnn
77-
repo = Repository('/pytorch')
160+
enable_cuda = args.enable_cuda
161+
repo = Repository("/pytorch")
78162
branch = repo.head.name
79-
if branch == 'HEAD':
80-
branch = 'master'
81-
163+
if branch == "HEAD":
164+
branch = "master"
82165

83-
print('Building PyTorch wheel')
166+
print("Building PyTorch wheel")
84167
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
85168
os.system("python setup.py clean")
86169

87170
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
88171
if override_package_version is not None:
89172
version = override_package_version
90-
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
91-
elif branch in ['nightly', 'master']:
92-
build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
93-
version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
173+
build_vars += (
174+
f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
175+
)
176+
elif branch in ["nightly", "master"]:
177+
build_date = (
178+
check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
179+
.decode()
180+
.replace("-", "")
181+
)
182+
version = (
183+
check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
184+
)
94185
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
95186
elif branch.startswith(("v1.", "v2.")):
96187
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
97188

98189
if enable_mkldnn:
99190
build_ArmComputeLibrary()
100191
print("build pytorch with mkldnn+acl backend")
101-
build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \
102-
"ACL_ROOT_DIR=/acl " \
103-
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \
104-
"ACL_INCLUDE_DIR=/acl/build " \
105-
"ACL_LIBRARY=/acl/build "
192+
build_vars += (
193+
"USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
194+
"ACL_ROOT_DIR=/acl "
195+
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH "
196+
"ACL_INCLUDE_DIR=/acl/build "
197+
"ACL_LIBRARY=/acl/build "
198+
)
106199
else:
107200
print("build pytorch without mkldnn backend")
108201

109202
os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
110-
pytorch_wheel_name = complete_wheel("pytorch")
111-
print(f"Build Compelete. Created {pytorch_wheel_name}..")
203+
if enable_cuda:
204+
print("Updating Cuda Dependency")
205+
filename = os.listdir("/pytorch/dist/")
206+
wheel_path = f"/pytorch/dist/{filename[0]}"
207+
update_wheel(wheel_path)
208+
pytorch_wheel_name = complete_wheel("/pytorch/")
209+
print(f"Build Complete. Created {pytorch_wheel_name}..")

common/install_cuda_aarch64.sh

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
function install_cusparselt_052 {
6+
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
7+
mkdir tmp_cusparselt && pushd tmp_cusparselt
8+
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
9+
tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
10+
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/
11+
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
12+
popd
13+
rm -rf tmp_cusparselt
14+
}
15+
16+
function install_124 {
17+
echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
18+
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
19+
# install CUDA 12.4.0 in the same container
20+
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run
21+
chmod +x cuda_12.4.0_550.54.14_linux_sbsa.run
22+
./cuda_12.4.0_550.54.14_linux_sbsa.run --toolkit --silent
23+
rm -f cuda_12.4.0_550.54.14_linux_sbsa.run
24+
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
25+
26+
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
27+
mkdir tmp_cudnn && cd tmp_cudnn
28+
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
29+
tar xf cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
30+
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
31+
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
32+
cd ..
33+
rm -rf tmp_cudnn
34+
35+
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
36+
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
37+
git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
38+
cd nccl && make -j src.build
39+
cp -a build/include/* /usr/local/cuda/include/
40+
cp -a build/lib/* /usr/local/cuda/lib64/
41+
cd ..
42+
rm -rf nccl
43+
44+
install_cusparselt_052
45+
46+
ldconfig
47+
}
48+
49+
function prune_124 {
50+
echo "Pruning CUDA 12.4"
51+
#####################################################################################
52+
# CUDA 12.4 prune static libs
53+
#####################################################################################
54+
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
55+
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
56+
57+
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
58+
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
59+
60+
if [[ -n "$OVERRIDE_GENCODE" ]]; then
61+
export GENCODE=$OVERRIDE_GENCODE
62+
fi
63+
64+
# all CUDA libs except CuDNN and CuBLAS
65+
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
66+
| xargs -I {} bash -c \
67+
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
68+
69+
# prune CuDNN and CuBLAS
70+
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
71+
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
72+
73+
#####################################################################################
74+
# CUDA 12.1 prune visual tools
75+
#####################################################################################
76+
export CUDA_BASE="/usr/local/cuda-12.4/"
77+
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
78+
}
79+
80+
# idiomatic parameter and option handling in sh
81+
while test $# -gt 0
82+
do
83+
case "$1" in
84+
12.4) install_124; prune_124
85+
;;
86+
*) echo "bad argument $1"; exit 1
87+
;;
88+
esac
89+
shift
90+
done

0 commit comments

Comments
 (0)