@@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) {
307
307
308
308
static const char *
309
309
uop_name (int index ) {
310
- if (index < EXIT_TRACE ) {
310
+ if (index < 256 ) {
311
311
return _PyOpcode_OpName [index ];
312
312
}
313
313
return _PyOpcode_uop_name [index ];
@@ -316,9 +316,9 @@ uop_name(int index) {
316
316
static Py_ssize_t
317
317
uop_len (_PyUOpExecutorObject * self )
318
318
{
319
- int count = 1 ;
319
+ int count = 0 ;
320
320
for (; count < _Py_UOP_MAX_TRACE_LENGTH ; count ++ ) {
321
- if (self -> trace [count - 1 ].opcode == EXIT_TRACE ) {
321
+ if (self -> trace [count ].opcode == 0 ) {
322
322
break ;
323
323
}
324
324
}
@@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self)
328
328
static PyObject *
329
329
uop_item (_PyUOpExecutorObject * self , Py_ssize_t index )
330
330
{
331
- for (int i = 0 ; i < _Py_UOP_MAX_TRACE_LENGTH ; i ++ ) {
332
- if (self -> trace [i ].opcode == EXIT_TRACE ) {
333
- break ;
334
- }
335
- if (i != index ) {
336
- continue ;
337
- }
338
- const char * name = uop_name (self -> trace [i ].opcode );
339
- PyObject * oname = _PyUnicode_FromASCII (name , strlen (name ));
340
- if (oname == NULL ) {
341
- return NULL ;
342
- }
343
- PyObject * operand = PyLong_FromUnsignedLongLong (self -> trace [i ].operand );
344
- if (operand == NULL ) {
345
- Py_DECREF (oname );
346
- return NULL ;
347
- }
348
- PyObject * args [2 ] = { oname , operand };
349
- return _PyTuple_FromArraySteal (args , 2 );
331
+ Py_ssize_t len = uop_len (self );
332
+ if (index < 0 || index >= len ) {
333
+ PyErr_SetNone (PyExc_IndexError );
334
+ return NULL ;
350
335
}
351
- PyErr_SetNone (PyExc_IndexError );
352
- return NULL ;
336
+ const char * name = uop_name (self -> trace [index ].opcode );
337
+ if (name == NULL ) {
338
+ name = "<nil>" ;
339
+ }
340
+ PyObject * oname = _PyUnicode_FromASCII (name , strlen (name ));
341
+ if (oname == NULL ) {
342
+ return NULL ;
343
+ }
344
+ PyObject * operand = PyLong_FromUnsignedLongLong (self -> trace [index ].operand );
345
+ if (operand == NULL ) {
346
+ Py_DECREF (oname );
347
+ return NULL ;
348
+ }
349
+ PyObject * args [2 ] = { oname , operand };
350
+ return _PyTuple_FromArraySteal (args , 2 );
353
351
}
354
352
355
353
PySequenceMethods uop_as_sequence = {
@@ -372,12 +370,13 @@ translate_bytecode_to_trace(
372
370
PyCodeObject * code ,
373
371
_Py_CODEUNIT * instr ,
374
372
_PyUOpInstruction * trace ,
375
- int max_length )
373
+ int buffer_size )
376
374
{
377
375
#ifdef Py_DEBUG
378
376
_Py_CODEUNIT * initial_instr = instr ;
379
377
#endif
380
378
int trace_length = 0 ;
379
+ int max_length = buffer_size ;
381
380
382
381
#ifdef Py_DEBUG
383
382
char * uop_debug = Py_GETENV ("PYTHONUOPSDEBUG" );
@@ -401,6 +400,14 @@ translate_bytecode_to_trace(
401
400
trace[trace_length].operand = (OPERAND); \
402
401
trace_length++;
403
402
403
+ #define ADD_TO_STUB (INDEX , OPCODE , OPERAND ) \
404
+ DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
405
+ (INDEX), \
406
+ (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
407
+ (uint64_t)(OPERAND)); \
408
+ trace[(INDEX)].opcode = (OPCODE); \
409
+ trace[(INDEX)].operand = (OPERAND);
410
+
404
411
DPRINTF (4 ,
405
412
"Optimizing %s (%s:%d) at byte offset %ld\n" ,
406
413
PyUnicode_AsUTF8 (code -> co_qualname ),
@@ -409,7 +416,7 @@ translate_bytecode_to_trace(
409
416
2 * (long )(initial_instr - (_Py_CODEUNIT * )code -> co_code_adaptive ));
410
417
411
418
for (;;) {
412
- ADD_TO_TRACE (SAVE_IP , ( int )( instr - (_Py_CODEUNIT * )code -> co_code_adaptive ) );
419
+ ADD_TO_TRACE (SAVE_IP , instr - (_Py_CODEUNIT * )code -> co_code_adaptive );
413
420
int opcode = instr -> op .code ;
414
421
int oparg = instr -> op .arg ;
415
422
int extras = 0 ;
@@ -420,12 +427,35 @@ translate_bytecode_to_trace(
420
427
oparg = (oparg << 8 ) | instr -> op .arg ;
421
428
}
422
429
if (opcode == ENTER_EXECUTOR ) {
423
- _PyExecutorObject * executor = (_PyExecutorObject * )code -> co_executors -> executors [oparg & 255 ];
430
+ _PyExecutorObject * executor =
431
+ (_PyExecutorObject * )code -> co_executors -> executors [oparg & 255 ];
424
432
opcode = executor -> vm_data .opcode ;
425
433
DPRINTF (2 , " * ENTER_EXECUTOR -> %s\n" , _PyOpcode_OpName [opcode ]);
426
434
oparg = (oparg & 0xffffff00 ) | executor -> vm_data .oparg ;
427
435
}
428
436
switch (opcode ) {
437
+
438
+ case POP_JUMP_IF_FALSE :
439
+ case POP_JUMP_IF_TRUE :
440
+ {
441
+ // Assume jump unlikely (TODO: handle jump likely case)
442
+ // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
443
+ if (trace_length + 5 > max_length ) {
444
+ DPRINTF (1 , "Ran out of space for POP_JUMP_IF_FALSE\n" );
445
+ goto done ;
446
+ }
447
+ _Py_CODEUNIT * target_instr =
448
+ instr + 1 + _PyOpcode_Caches [_PyOpcode_Deopt [opcode ]] + oparg ;
449
+ max_length -= 2 ; // Really the start of the stubs
450
+ int uopcode = opcode == POP_JUMP_IF_TRUE ?
451
+ _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE ;
452
+ ADD_TO_TRACE (uopcode , max_length );
453
+ ADD_TO_STUB (max_length , SAVE_IP ,
454
+ target_instr - (_Py_CODEUNIT * )code -> co_code_adaptive );
455
+ ADD_TO_STUB (max_length + 1 , EXIT_TRACE , 0 );
456
+ break ;
457
+ }
458
+
429
459
default :
430
460
{
431
461
const struct opcode_macro_expansion * expansion = & _PyOpcode_macro_expansion [opcode ];
@@ -503,6 +533,30 @@ translate_bytecode_to_trace(
503
533
code -> co_firstlineno ,
504
534
2 * (long )(initial_instr - (_Py_CODEUNIT * )code -> co_code_adaptive ),
505
535
trace_length );
536
+ if (max_length < buffer_size && trace_length < max_length ) {
537
+ // Move the stubs back to be immediately after the main trace
538
+ // (which ends at trace_length)
539
+ DPRINTF (2 ,
540
+ "Moving %d stub uops back by %d\n" ,
541
+ buffer_size - max_length ,
542
+ max_length - trace_length );
543
+ memmove (trace + trace_length ,
544
+ trace + max_length ,
545
+ (buffer_size - max_length ) * sizeof (_PyUOpInstruction ));
546
+ // Patch up the jump targets
547
+ for (int i = 0 ; i < trace_length ; i ++ ) {
548
+ if (trace [i ].opcode == _POP_JUMP_IF_FALSE ||
549
+ trace [i ].opcode == _POP_JUMP_IF_TRUE )
550
+ {
551
+ int target = trace [i ].operand ;
552
+ if (target >= max_length ) {
553
+ target += trace_length - max_length ;
554
+ trace [i ].operand = target ;
555
+ }
556
+ }
557
+ }
558
+ trace_length += buffer_size - max_length ;
559
+ }
506
560
return trace_length ;
507
561
}
508
562
else {
@@ -539,6 +593,9 @@ uop_optimize(
539
593
}
540
594
executor -> base .execute = _PyUopExecute ;
541
595
memcpy (executor -> trace , trace , trace_length * sizeof (_PyUOpInstruction ));
596
+ if (trace_length < _Py_UOP_MAX_TRACE_LENGTH ) {
597
+ executor -> trace [trace_length ].opcode = 0 ; // Sentinel
598
+ }
542
599
* exec_ptr = (_PyExecutorObject * )executor ;
543
600
return 1 ;
544
601
}
0 commit comments