Skip to content

Commit 22988c3

Browse files
authored
gh-106529: Implement POP_JUMP_IF_XXX uops (#106551)
- Hand-written uops JUMP_IF_{TRUE,FALSE}. These peek at the top of the stack. The jump target (in superblock space) is absolute. - Hand-written translation for POP_JUMP_IF_{TRUE,FALSE}, assuming the jump is unlikely. Once we implement jump-likelihood profiling, we can implement the jump-unlikely case (in another PR). - Tests (including some test cleanup). - Improvements to len(ex) and ex[i] to expose the whole trace.
1 parent 18dfbd0 commit 22988c3

File tree

5 files changed

+181
-72
lines changed

5 files changed

+181
-72
lines changed

Lib/test/test_capi/test_misc.py

+43-20
Original file line numberDiff line numberDiff line change
@@ -2347,11 +2347,12 @@ def func():
23472347

23482348
@contextlib.contextmanager
23492349
def temporary_optimizer(opt):
2350+
old_opt = _testinternalcapi.get_optimizer()
23502351
_testinternalcapi.set_optimizer(opt)
23512352
try:
23522353
yield
23532354
finally:
2354-
_testinternalcapi.set_optimizer(None)
2355+
_testinternalcapi.set_optimizer(old_opt)
23552356

23562357

23572358
@contextlib.contextmanager
@@ -2420,8 +2421,8 @@ def long_loop():
24202421
self.assertEqual(opt.get_count(), 10)
24212422

24222423

2423-
2424-
def get_first_executor(code):
2424+
def get_first_executor(func):
2425+
code = func.__code__
24252426
co_code = code.co_code
24262427
JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"]
24272428
for i in range(0, len(co_code), 2):
@@ -2446,13 +2447,7 @@ def testfunc(x):
24462447
with temporary_optimizer(opt):
24472448
testfunc(1000)
24482449

2449-
ex = None
2450-
for offset in range(0, len(testfunc.__code__.co_code), 2):
2451-
try:
2452-
ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
2453-
break
2454-
except ValueError:
2455-
pass
2450+
ex = get_first_executor(testfunc)
24562451
self.assertIsNotNone(ex)
24572452
uops = {opname for opname, _ in ex}
24582453
self.assertIn("SAVE_IP", uops)
@@ -2493,11 +2488,13 @@ def many_vars():
24932488

24942489
opt = _testinternalcapi.get_uop_optimizer()
24952490
with temporary_optimizer(opt):
2496-
ex = get_first_executor(many_vars.__code__)
2491+
ex = get_first_executor(many_vars)
24972492
self.assertIsNone(ex)
24982493
many_vars()
2499-
ex = get_first_executor(many_vars.__code__)
2500-
self.assertIn(("LOAD_FAST", 259), list(ex))
2494+
2495+
ex = get_first_executor(many_vars)
2496+
self.assertIsNotNone(ex)
2497+
self.assertIn(("LOAD_FAST", 259), list(ex))
25012498

25022499
def test_unspecialized_unpack(self):
25032500
# An example of an unspecialized opcode
@@ -2516,17 +2513,43 @@ def testfunc(x):
25162513
with temporary_optimizer(opt):
25172514
testfunc(10)
25182515

2519-
ex = None
2520-
for offset in range(0, len(testfunc.__code__.co_code), 2):
2521-
try:
2522-
ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
2523-
break
2524-
except ValueError:
2525-
pass
2516+
ex = get_first_executor(testfunc)
25262517
self.assertIsNotNone(ex)
25272518
uops = {opname for opname, _ in ex}
25282519
self.assertIn("UNPACK_SEQUENCE", uops)
25292520

2521+
def test_pop_jump_if_false(self):
2522+
def testfunc(n):
2523+
i = 0
2524+
while i < n:
2525+
i += 1
2526+
2527+
opt = _testinternalcapi.get_uop_optimizer()
2528+
2529+
with temporary_optimizer(opt):
2530+
testfunc(10)
2531+
2532+
ex = get_first_executor(testfunc)
2533+
self.assertIsNotNone(ex)
2534+
uops = {opname for opname, _ in ex}
2535+
self.assertIn("_POP_JUMP_IF_FALSE", uops)
2536+
2537+
def test_pop_jump_if_true(self):
2538+
def testfunc(n):
2539+
i = 0
2540+
while not i >= n:
2541+
i += 1
2542+
2543+
opt = _testinternalcapi.get_uop_optimizer()
2544+
2545+
with temporary_optimizer(opt):
2546+
testfunc(10)
2547+
2548+
ex = get_first_executor(testfunc)
2549+
self.assertIsNotNone(ex)
2550+
uops = {opname for opname, _ in ex}
2551+
self.assertIn("_POP_JUMP_IF_TRUE", uops)
2552+
25302553

25312554
if __name__ == "__main__":
25322555
unittest.main()

Python/ceval.c

+21-1
Original file line numberDiff line numberDiff line change
@@ -2751,7 +2751,8 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
27512751
operand = self->trace[pc].operand;
27522752
oparg = (int)operand;
27532753
DPRINTF(3,
2754-
" uop %s, operand %" PRIu64 ", stack_level %d\n",
2754+
"%4d: uop %s, operand %" PRIu64 ", stack_level %d\n",
2755+
pc,
27552756
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
27562757
operand,
27572758
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
@@ -2763,6 +2764,25 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
27632764
#define ENABLE_SPECIALIZATION 0
27642765
#include "executor_cases.c.h"
27652766

2767+
// NOTE: These pop-jumps move the uop pc, not the bytecode ip
2768+
case _POP_JUMP_IF_FALSE:
2769+
{
2770+
if (Py_IsFalse(stack_pointer[-1])) {
2771+
pc = oparg;
2772+
}
2773+
stack_pointer--;
2774+
break;
2775+
}
2776+
2777+
case _POP_JUMP_IF_TRUE:
2778+
{
2779+
if (Py_IsTrue(stack_pointer[-1])) {
2780+
pc = oparg;
2781+
}
2782+
stack_pointer--;
2783+
break;
2784+
}
2785+
27662786
case SAVE_IP:
27672787
{
27682788
frame->prev_instr = ip_offset + oparg;

Python/opcode_metadata.h

+28-24
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

+84-27
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) {
307307

308308
static const char *
309309
uop_name(int index) {
310-
if (index < EXIT_TRACE) {
310+
if (index < 256) {
311311
return _PyOpcode_OpName[index];
312312
}
313313
return _PyOpcode_uop_name[index];
@@ -316,9 +316,9 @@ uop_name(int index) {
316316
static Py_ssize_t
317317
uop_len(_PyUOpExecutorObject *self)
318318
{
319-
int count = 1;
319+
int count = 0;
320320
for (; count < _Py_UOP_MAX_TRACE_LENGTH; count++) {
321-
if (self->trace[count-1].opcode == EXIT_TRACE) {
321+
if (self->trace[count].opcode == 0) {
322322
break;
323323
}
324324
}
@@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self)
328328
static PyObject *
329329
uop_item(_PyUOpExecutorObject *self, Py_ssize_t index)
330330
{
331-
for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) {
332-
if (self->trace[i].opcode == EXIT_TRACE) {
333-
break;
334-
}
335-
if (i != index) {
336-
continue;
337-
}
338-
const char *name = uop_name(self->trace[i].opcode);
339-
PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
340-
if (oname == NULL) {
341-
return NULL;
342-
}
343-
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[i].operand);
344-
if (operand == NULL) {
345-
Py_DECREF(oname);
346-
return NULL;
347-
}
348-
PyObject *args[2] = { oname, operand };
349-
return _PyTuple_FromArraySteal(args, 2);
331+
Py_ssize_t len = uop_len(self);
332+
if (index < 0 || index >= len) {
333+
PyErr_SetNone(PyExc_IndexError);
334+
return NULL;
350335
}
351-
PyErr_SetNone(PyExc_IndexError);
352-
return NULL;
336+
const char *name = uop_name(self->trace[index].opcode);
337+
if (name == NULL) {
338+
name = "<nil>";
339+
}
340+
PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
341+
if (oname == NULL) {
342+
return NULL;
343+
}
344+
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
345+
if (operand == NULL) {
346+
Py_DECREF(oname);
347+
return NULL;
348+
}
349+
PyObject *args[2] = { oname, operand };
350+
return _PyTuple_FromArraySteal(args, 2);
353351
}
354352

355353
PySequenceMethods uop_as_sequence = {
@@ -372,12 +370,13 @@ translate_bytecode_to_trace(
372370
PyCodeObject *code,
373371
_Py_CODEUNIT *instr,
374372
_PyUOpInstruction *trace,
375-
int max_length)
373+
int buffer_size)
376374
{
377375
#ifdef Py_DEBUG
378376
_Py_CODEUNIT *initial_instr = instr;
379377
#endif
380378
int trace_length = 0;
379+
int max_length = buffer_size;
381380

382381
#ifdef Py_DEBUG
383382
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@@ -401,6 +400,14 @@ translate_bytecode_to_trace(
401400
trace[trace_length].operand = (OPERAND); \
402401
trace_length++;
403402

403+
#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \
404+
DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
405+
(INDEX), \
406+
(OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
407+
(uint64_t)(OPERAND)); \
408+
trace[(INDEX)].opcode = (OPCODE); \
409+
trace[(INDEX)].operand = (OPERAND);
410+
404411
DPRINTF(4,
405412
"Optimizing %s (%s:%d) at byte offset %ld\n",
406413
PyUnicode_AsUTF8(code->co_qualname),
@@ -409,7 +416,7 @@ translate_bytecode_to_trace(
409416
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive));
410417

411418
for (;;) {
412-
ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
419+
ADD_TO_TRACE(SAVE_IP, instr - (_Py_CODEUNIT *)code->co_code_adaptive);
413420
int opcode = instr->op.code;
414421
int oparg = instr->op.arg;
415422
int extras = 0;
@@ -420,12 +427,35 @@ translate_bytecode_to_trace(
420427
oparg = (oparg << 8) | instr->op.arg;
421428
}
422429
if (opcode == ENTER_EXECUTOR) {
423-
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255];
430+
_PyExecutorObject *executor =
431+
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
424432
opcode = executor->vm_data.opcode;
425433
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
426434
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
427435
}
428436
switch (opcode) {
437+
438+
case POP_JUMP_IF_FALSE:
439+
case POP_JUMP_IF_TRUE:
440+
{
441+
// Assume jump unlikely (TODO: handle jump likely case)
442+
// Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
443+
if (trace_length + 5 > max_length) {
444+
DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n");
445+
goto done;
446+
}
447+
_Py_CODEUNIT *target_instr =
448+
instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
449+
max_length -= 2; // Really the start of the stubs
450+
int uopcode = opcode == POP_JUMP_IF_TRUE ?
451+
_POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE;
452+
ADD_TO_TRACE(uopcode, max_length);
453+
ADD_TO_STUB(max_length, SAVE_IP,
454+
target_instr - (_Py_CODEUNIT *)code->co_code_adaptive);
455+
ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0);
456+
break;
457+
}
458+
429459
default:
430460
{
431461
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
@@ -503,6 +533,30 @@ translate_bytecode_to_trace(
503533
code->co_firstlineno,
504534
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive),
505535
trace_length);
536+
if (max_length < buffer_size && trace_length < max_length) {
537+
// Move the stubs back to be immediately after the main trace
538+
// (which ends at trace_length)
539+
DPRINTF(2,
540+
"Moving %d stub uops back by %d\n",
541+
buffer_size - max_length,
542+
max_length - trace_length);
543+
memmove(trace + trace_length,
544+
trace + max_length,
545+
(buffer_size - max_length) * sizeof(_PyUOpInstruction));
546+
// Patch up the jump targets
547+
for (int i = 0; i < trace_length; i++) {
548+
if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
549+
trace[i].opcode == _POP_JUMP_IF_TRUE)
550+
{
551+
int target = trace[i].operand;
552+
if (target >= max_length) {
553+
target += trace_length - max_length;
554+
trace[i].operand = target;
555+
}
556+
}
557+
}
558+
trace_length += buffer_size - max_length;
559+
}
506560
return trace_length;
507561
}
508562
else {
@@ -539,6 +593,9 @@ uop_optimize(
539593
}
540594
executor->base.execute = _PyUopExecute;
541595
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
596+
if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) {
597+
executor->trace[trace_length].opcode = 0; // Sentinel
598+
}
542599
*exec_ptr = (_PyExecutorObject *)executor;
543600
return 1;
544601
}

0 commit comments

Comments
 (0)