Skip to content

Commit 524a027

Browse files
authored
aarch64: patch mkl-dnn for xbyak crashes due to /sys not accessible (#1648)
There are platforms with /sys not mounted. skip handling HW caps for such platforms. cherry-pick of: uxlfoundation/oneDNN#1773 This fixes the issue# pytorch/pytorch#115482
1 parent c55c58b commit 524a027

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed

aarch64_linux/aarch64_wheel_ci_build.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ def parse_arguments():
106106
else:
107107
print("build pytorch without mkldnn backend")
108108

109+
# patch mkldnn to fix aarch64 mac and aws lambda crash
110+
print("Applying mkl-dnn patch to fix crash due to /sys not accesible")
111+
os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/fix-xbyak-failure.patch")
112+
109113
os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
110114
pytorch_wheel_name = complete_wheel("pytorch")
111115
print(f"Build Compelete. Created {pytorch_wheel_name}..")

aarch64_linux/build_aarch64_wheel.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,8 @@ def start_build(host: RemoteHost, *,
556556
build_ArmComputeLibrary(host, git_clone_flags)
557557
print("build pytorch with mkldnn+acl backend")
558558
build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
559+
host.run_cmd("cd $HOME && git clone https://github.com/pytorch/builder.git")
560+
host.run_cmd("cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/fix-xbyak-failure.patch") # noqa: E501
559561
host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}") # noqa: E501
560562
print('Repair the wheel')
561563
pytorch_wheel_name = host.list_dir("pytorch/dist")[0]

mkldnn_fix/fix-xbyak-failure.patch

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
cpu: aarch64: fix xbyak functions for /sys access failures
2+
3+
There are platforms with /sys not mounted. skip handling HW caps
4+
for such platforms.
5+
6+
This fixes the issue# pytorch/pytorch#115482
7+
---
8+
.../xbyak_aarch64/src/util_impl_linux.h | 24 ++++++++++++++-----
9+
.../aarch64/xbyak_aarch64/src/util_impl_mac.h | 9 ++++---
10+
2 files changed, 24 insertions(+), 9 deletions(-)
11+
12+
diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
13+
index 2c7b28e58b..860a05700f 100644
14+
--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
15+
+++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
16+
@@ -144,8 +144,13 @@ private:
17+
regex_t regexBuf;
18+
regmatch_t match[1];
19+
20+
- if (regcomp(&regexBuf, regex, REG_EXTENDED) != 0)
21+
- throw ERR_INTERNAL;
22+
+ if (regcomp(&regexBuf, regex, REG_EXTENDED) != 0) {
23+
+ /* There are platforms with /sys not mounted. return empty buffers
24+
+ * in these scenarios
25+
+ */
26+
+ buf[0] = '\0';
27+
+ return 0;
28+
+ }
29+
30+
const int retVal = regexec(&regexBuf, path, 1, match, 0);
31+
regfree(&regexBuf);
32+
@@ -187,8 +192,12 @@ private:
33+
regex_t regexBuf;
34+
regmatch_t match[2];
35+
36+
- if (regcomp(&regexBuf, "index[0-9]*$", REG_EXTENDED) != 0)
37+
- throw ERR_INTERNAL;
38+
+ if (regcomp(&regexBuf, "index[0-9]*$", REG_EXTENDED) != 0) {
39+
+ /* There are platforms with /sys not mounted. return gracefully
40+
+ * in these scenarios
41+
+ */
42+
+ goto init_and_return_false;
43+
+ }
44+
45+
if (regexec(&regexBuf, dp->d_name, 1, match, 0) == 0) { // Found index[1-9][0-9]. directory
46+
char *dir_name = buf0;
47+
@@ -438,12 +447,15 @@ private:
48+
49+
FILE *file = fopen(path_midr_el1, "r");
50+
if (file == nullptr) {
51+
- throw Error(ERR_INTERNAL);
52+
+ /* There are platforms with /sys not mounted. return empty buffer
53+
+ * in these scenarios
54+
+ */
55+
+ cacheInfo_.midr_el1 = 0xFE << 24;
56+
return;
57+
}
58+
59+
if (fread(buf, sizeof(char), 64, file) == 0) {
60+
- throw Error(ERR_INTERNAL);
61+
+ cacheInfo_.midr_el1 = 0xFE << 24;
62+
return;
63+
}
64+
65+
diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
66+
index ebd6dba7c0..93bdae1d7a 100644
67+
--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
68+
+++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
69+
@@ -102,18 +102,21 @@ private:
70+
size_t val = 0;
71+
size_t len = sizeof(val);
72+
73+
+ /* There are platforms with /sys not mounted. skip
74+
+ * handling HW caps for such platforms.
75+
+ */
76+
if (sysctlbyname(hw_opt_atomics, &val, &len, NULL, 0) != 0)
77+
- throw Error(ERR_INTERNAL);
78+
+ type_ = 0;
79+
else
80+
type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ATOMIC : 0;
81+
82+
if (sysctlbyname(hw_opt_fp, &val, &len, NULL, 0) != 0)
83+
- throw Error(ERR_INTERNAL);
84+
+ type_ = 0;
85+
else
86+
type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_FP : 0;
87+
88+
if (sysctlbyname(hw_opt_neon, &val, &len, NULL, 0) != 0)
89+
- throw Error(ERR_INTERNAL);
90+
+ type_ = 0;
91+
else
92+
type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ADVSIMD : 0;
93+
}
94+
--
95+
2.34.1
96+

0 commit comments

Comments
 (0)