Skip to content

Commit 465d98b

Browse files
authored
setting MAX_JOBS=5 for oom in CUDA arm wheel (#1828)
1 parent ec92cce commit 465d98b

File tree

3 files changed

+16
-13
lines changed

3 files changed

+16
-13
lines changed

aarch64_linux/aarch64_ci_build.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ cd /
2828
git config --global --add safe.directory /pytorch
2929
pip install -r /pytorch/requirements.txt
3030
pip install auditwheel
31-
if [ -n "$GPU_ARCH_VERSION" ]; then
32-
echo "BASE_CUDA_VERSION is set to: $GPU_ARCH_VERSION"
33-
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
34-
else
35-
echo "BASE_CUDA_VERSION is not set."
31+
if [ "$DESIRED_CUDA" = "cpu" ]; then
32+
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
3633
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
34+
else
35+
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
36+
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
3737
fi

aarch64_linux/aarch64_wheel_ci_build.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,10 @@ def update_wheel(wheel_path) -> None:
122122
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8",
123123
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
124124
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
125-
"/opt/conda/envs/aarch64_env/lib/libopenblas.so.0",
126-
"/opt/conda/envs/aarch64_env/lib/libgfortran.so.5",
127125
"/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
126+
"/opt/OpenBLAS/lib/libopenblas.so.0",
128127
"/acl/build/libarm_compute.so",
129128
"/acl/build/libarm_compute_graph.so",
130-
"/acl/build/libarm_compute_core.so",
131129
]
132130
# Copy libraries to unzipped_folder/a/lib
133131
for lib_path in libs_to_copy:
@@ -140,10 +138,10 @@ def update_wheel(wheel_path) -> None:
140138
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
141139
shutil.move(
142140
f"{folder}/cuda_wheel/{wheelname}",
143-
f"/dist/{wheelname}",
141+
f"{folder}/{wheelname}",
144142
copy_function=shutil.copy2,
145143
)
146-
os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/")
144+
os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/")
147145

148146

149147
def complete_wheel(folder: str) -> str:
@@ -201,8 +199,9 @@ def parse_arguments():
201199
branch = "master"
202200

203201
print("Building PyTorch wheel")
204-
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
205-
os.system("python setup.py clean")
202+
os.system("export USE_PRIORITIZED_TEXT_FOR_LD=1") #enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
203+
build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
204+
os.system("cd /pytorch; python setup.py clean")
206205

207206
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
208207
if override_package_version is not None:

manywheel/build_cuda.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,11 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
6060
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
6161
case ${CUDA_VERSION} in
6262
12.4)
63-
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
63+
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
64+
TORCH_CUDA_ARCH_LIST="9.0"
65+
else
66+
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
67+
fi
6468
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
6569
;;
6670
12.1)

0 commit comments

Comments
 (0)