Skip to content

Commit ea0453e

Browse files
authored
GH-130887: Always remove trailing jumps in AArch64 JIT stencils (GH-131042)
1 parent 0a91456 commit ea0453e

File tree

3 files changed

+35
-9
lines changed

3 files changed

+35
-9
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optimize the AArch64 code generation for the JIT. Patch by Diego Russo

Tools/jit/_stencils.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,24 @@ def pad(self, alignment: int) -> None:
209209
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
210210
self.body.extend([0] * padding)
211211

212-
def remove_jump(self, *, alignment: int = 1) -> None:
212+
def add_nops(self, nop: bytes, alignment: int) -> None:
213+
"""Add NOPs until there is alignment. Fail if it is not possible."""
214+
offset = len(self.body)
215+
nop_size = len(nop)
216+
217+
# Calculate the gap to the next multiple of alignment.
218+
gap = -offset % alignment
219+
if gap:
220+
if gap % nop_size == 0:
221+
count = gap // nop_size
222+
self.body.extend(nop * count)
223+
else:
224+
raise ValueError(
225+
f"Cannot add nops of size '{nop_size}' to a body with "
226+
f"offset '{offset}' to align with '{alignment}'"
227+
)
228+
229+
def remove_jump(self) -> None:
213230
"""Remove a zero-length continuation jump, if it exists."""
214231
hole = max(self.holes, key=lambda hole: hole.offset)
215232
match hole:
@@ -244,7 +261,7 @@ def remove_jump(self, *, alignment: int = 1) -> None:
244261
jump = b"\x00\x00\x00\x14"
245262
case _:
246263
return
247-
if self.body[offset:] == jump and offset % alignment == 0:
264+
if self.body[offset:] == jump:
248265
self.body = self.body[:offset]
249266
self.holes.remove(hole)
250267

@@ -266,10 +283,7 @@ class StencilGroup:
266283
_trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
267284

268285
def process_relocations(
269-
self,
270-
known_symbols: dict[str, int],
271-
*,
272-
alignment: int = 1,
286+
self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b""
273287
) -> None:
274288
"""Fix up all GOT and internal relocations for this stencil group."""
275289
for hole in self.code.holes.copy():
@@ -289,8 +303,8 @@ def process_relocations(
289303
self._trampolines.add(ordinal)
290304
hole.addend = ordinal
291305
hole.symbol = None
292-
self.code.remove_jump(alignment=alignment)
293-
self.code.pad(alignment)
306+
self.code.remove_jump()
307+
self.code.add_nops(nop=nop, alignment=alignment)
294308
self.data.pad(8)
295309
for stencil in [self.code, self.data]:
296310
for hole in stencil.holes:

Tools/jit/_targets.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]):
4444
verbose: bool = False
4545
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
4646

47+
def _get_nop(self) -> bytes:
48+
if re.fullmatch(r"aarch64-.*", self.triple):
49+
nop = b"\x1f\x20\x03\xD5"
50+
elif re.fullmatch(r"x86_64-.*|i686.*", self.triple):
51+
nop = b"\x90"
52+
else:
53+
raise ValueError(f"NOP not defined for {self.triple}")
54+
return nop
55+
4756
def _compute_digest(self, out: pathlib.Path) -> str:
4857
hasher = hashlib.sha256()
4958
hasher.update(self.triple.encode())
@@ -172,7 +181,9 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
172181
stencil_groups = {task.get_name(): task.result() for task in tasks}
173182
for stencil_group in stencil_groups.values():
174183
stencil_group.process_relocations(
175-
known_symbols=self.known_symbols, alignment=self.alignment
184+
known_symbols=self.known_symbols,
185+
alignment=self.alignment,
186+
nop=self._get_nop(),
176187
)
177188
return stencil_groups
178189

0 commit comments

Comments
 (0)