Skip to content

GH-117512: Allow 64-bit JIT operands on 32-bit platforms #117527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 34 additions & 28 deletions Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,12 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start,
// Fill all of stencil's holes in the memory pointed to by base, using the
// values in patches.
static void
patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
patch(unsigned char *base, const Stencil *stencil, uintptr_t patches[])
{
for (uint64_t i = 0; i < stencil->holes_size; i++) {
for (size_t i = 0; i < stencil->holes_size; i++) {
const Hole *hole = &stencil->holes[i];
unsigned char *location = base + hole->offset;
uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend;
uint64_t value = patches[hole->value] + (uintptr_t)hole->symbol + hole->addend;
uint8_t *loc8 = (uint8_t *)location;
uint32_t *loc32 = (uint32_t *)location;
uint64_t *loc64 = (uint64_t *)location;
Expand Down Expand Up @@ -228,7 +228,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
case HoleKind_X86_64_RELOC_SIGNED:
case HoleKind_X86_64_RELOC_BRANCH:
// 32-bit relative address.
value -= (uint64_t)location;
value -= (uintptr_t)location;
// Check that we're not out of range of 32 signed bits:
assert((int64_t)value >= -(1LL << 31));
assert((int64_t)value < (1LL << 31));
Expand All @@ -239,7 +239,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
case HoleKind_R_AARCH64_JUMP26:
// 28-bit relative branch.
assert(IS_AARCH64_BRANCH(*loc32));
value -= (uint64_t)location;
value -= (uintptr_t)location;
// Check that we're not out of range of 28 signed bits:
assert((int64_t)value >= -(1 << 27));
assert((int64_t)value < (1 << 27));
Expand Down Expand Up @@ -313,7 +313,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
i++;
continue;
}
relaxed = (uint64_t)value - (uint64_t)location;
relaxed = value - (uintptr_t)location;
if ((relaxed & 0x3) == 0 &&
(int64_t)relaxed >= -(1L << 19) &&
(int64_t)relaxed < (1L << 19))
Expand All @@ -328,7 +328,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
// Fall through...
case HoleKind_ARM64_RELOC_PAGE21:
// Number of pages between this page and the value's page:
value = (value >> 12) - ((uint64_t)location >> 12);
value = (value >> 12) - ((uintptr_t)location >> 12);
// Check that we're not out of range of 21 signed bits:
assert((int64_t)value >= -(1 << 20));
assert((int64_t)value < (1 << 20));
Expand Down Expand Up @@ -363,14 +363,14 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
}

static void
copy_and_patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
copy_and_patch(unsigned char *base, const Stencil *stencil, uintptr_t patches[])
{
memcpy(base, stencil->body, stencil->body_size);
patch(base, stencil, patches);
}

static void
emit(const StencilGroup *group, uint64_t patches[])
emit(const StencilGroup *group, uintptr_t patches[])
{
copy_and_patch((unsigned char *)patches[HoleValue_DATA], &group->data, patches);
copy_and_patch((unsigned char *)patches[HoleValue_CODE], &group->code, patches);
Expand All @@ -381,9 +381,9 @@ int
_PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length)
{
// Loop once to find the total compiled size:
uint32_t instruction_starts[UOP_MAX_TRACE_LENGTH];
uint32_t code_size = 0;
uint32_t data_size = 0;
size_t instruction_starts[UOP_MAX_TRACE_LENGTH];
size_t code_size = 0;
size_t data_size = 0;
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
Expand All @@ -409,14 +409,20 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
// Think of patches as a dictionary mapping HoleValue to uint64_t:
uint64_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uint64_t)code;
patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size;
patches[HoleValue_DATA] = (uint64_t)data;
patches[HoleValue_EXECUTOR] = (uint64_t)executor;
// Think of patches as a dictionary mapping HoleValue to uintptr_t:
uintptr_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uintptr_t)code;
patches[HoleValue_CONTINUE] = (uintptr_t)code + group->code.body_size;
patches[HoleValue_DATA] = (uintptr_t)data;
patches[HoleValue_EXECUTOR] = (uintptr_t)executor;
patches[HoleValue_OPARG] = instruction->oparg;
#if SIZEOF_VOID_P == 8
patches[HoleValue_OPERAND] = instruction->operand;
#else
assert(SIZEOF_VOID_P == 4);
patches[HoleValue_OPERAND_HI] = instruction->operand >> 32;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should patches become uintptr_t instead of uint64_t? (on all platforms).
Given many of the casts are of the form (uint64_t)some_pointer, changing them to (uintptr_t)some_pointer avoids unnecessary widening on 32 bit platforms.

patches[HoleValue_OPERAND_LO] = instruction->operand & UINT32_MAX;
#endif
switch (instruction->format) {
case UOP_FORMAT_TARGET:
patches[HoleValue_TARGET] = instruction->target;
Expand All @@ -425,34 +431,34 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size
assert(instruction->exit_index < executor->exit_count);
patches[HoleValue_EXIT_INDEX] = instruction->exit_index;
if (instruction->error_target < length) {
patches[HoleValue_ERROR_TARGET] = (uint64_t)memory + instruction_starts[instruction->error_target];
patches[HoleValue_ERROR_TARGET] = (uintptr_t)memory + instruction_starts[instruction->error_target];
}
break;
case UOP_FORMAT_JUMP:
assert(instruction->jump_target < length);
patches[HoleValue_JUMP_TARGET] = (uint64_t)memory + instruction_starts[instruction->jump_target];
patches[HoleValue_JUMP_TARGET] = (uintptr_t)memory + instruction_starts[instruction->jump_target];
if (instruction->error_target < length) {
patches[HoleValue_ERROR_TARGET] = (uint64_t)memory + instruction_starts[instruction->error_target];
patches[HoleValue_ERROR_TARGET] = (uintptr_t)memory + instruction_starts[instruction->error_target];
}
break;
default:
assert(0);
Py_FatalError("Illegal instruction format");
}
patches[HoleValue_TOP] = (uint64_t)memory + instruction_starts[1];
patches[HoleValue_TOP] = (uintptr_t)memory + instruction_starts[1];
patches[HoleValue_ZERO] = 0;
emit(group, patches);
code += group->code.body_size;
data += group->data.body_size;
}
// Protect against accidental buffer overrun into data:
const StencilGroup *group = &stencil_groups[_FATAL_ERROR];
uint64_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uint64_t)code;
patches[HoleValue_CONTINUE] = (uint64_t)code;
patches[HoleValue_DATA] = (uint64_t)data;
patches[HoleValue_EXECUTOR] = (uint64_t)executor;
patches[HoleValue_TOP] = (uint64_t)code;
uintptr_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uintptr_t)code;
patches[HoleValue_CONTINUE] = (uintptr_t)code;
patches[HoleValue_DATA] = (uintptr_t)data;
patches[HoleValue_EXECUTOR] = (uintptr_t)executor;
patches[HoleValue_TOP] = (uintptr_t)code;
patches[HoleValue_ZERO] = 0;
emit(group, patches);
code += group->code.body_size;
Expand Down
5 changes: 4 additions & 1 deletion Tools/jit/_stencils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ class HoleValue(enum.Enum):
GOT = enum.auto()
# The current uop's oparg (exposed as _JIT_OPARG):
OPARG = enum.auto()
# The current uop's operand (exposed as _JIT_OPERAND):
# The current uop's operand on 64-bit platforms (exposed as _JIT_OPERAND):
OPERAND = enum.auto()
# The current uop's operand on 32-bit platforms (exposed as _JIT_OPERAND_HI and _JIT_OPERAND_LO):
OPERAND_HI = enum.auto()
OPERAND_LO = enum.auto()
# The current uop's target (exposed as _JIT_TARGET):
TARGET = enum.auto()
# The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET):
Expand Down
4 changes: 2 additions & 2 deletions Tools/jit/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def _dump_header() -> typing.Iterator[str]:
yield "} HoleValue;"
yield ""
yield "typedef struct {"
yield " const uint64_t offset;"
yield " const size_t offset;"
yield " const HoleKind kind;"
yield " const HoleValue value;"
yield " const void *symbol;"
Expand Down Expand Up @@ -58,7 +58,7 @@ def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]:
yield ""
yield "#define GET_PATCHES() { \\"
for value in _stencils.HoleValue:
yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\"
yield f" [HoleValue_{value.name}] = (uintptr_t)0xBADBADBADBADBADB, \\"
yield "}"


Expand Down
7 changes: 7 additions & 0 deletions Tools/jit/template.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,14 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *
int opcode = _JIT_OPCODE;
// Other stuff we need handy:
PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
#if SIZEOF_VOID_P == 8
PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND)
#else
assert(SIZEOF_VOID_P == 4);
PATCH_VALUE(uint32_t, _operand_hi, _JIT_OPERAND_HI)
PATCH_VALUE(uint32_t, _operand_lo, _JIT_OPERAND_LO)
uint64_t _operand = ((uint64_t)_operand_hi << 32) | _operand_lo;
#endif
PATCH_VALUE(uint32_t, _target, _JIT_TARGET)
PATCH_VALUE(uint16_t, _exit_index, _JIT_EXIT_INDEX)

Expand Down