Skip to content

Commit 31faa43

Browse files
authored
[mypyc] Inline fast paths of integer unboxing operations (#17266)
This applies to `int` and native integer types. This speeds up this micro-benchmark by up to 80% (it spends most of the time unboxing integers): ``` # a is list[int]/list[i64]/... for i in a: if i == 789: n += 1 ``` The impact to compile time when self-compiling is below the noise floor. The generated binary is about 0.1% larger. Since integer unboxing can be performance-critical, this seems like a decent win. Closes mypyc/mypyc#987. Work on mypyc/mypyc#757.
1 parent b81b9e0 commit 31faa43

File tree

6 files changed

+269
-192
lines changed

6 files changed

+269
-192
lines changed

mypyc/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
"exc_ops.c",
8080
"misc_ops.c",
8181
"generic_ops.c",
82+
"pythonsupport.c",
8283
]
8384

8485

mypyc/lib-rt/CPy.h

Lines changed: 140 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,6 @@ static inline size_t CPy_FindAttrOffset(PyTypeObject *trait, CPyVTableItem *vtab
120120
CPyTagged CPyTagged_FromSsize_t(Py_ssize_t value);
121121
CPyTagged CPyTagged_FromVoidPtr(void *ptr);
122122
CPyTagged CPyTagged_FromInt64(int64_t value);
123-
CPyTagged CPyTagged_FromObject(PyObject *object);
124-
CPyTagged CPyTagged_StealFromObject(PyObject *object);
125-
CPyTagged CPyTagged_BorrowFromObject(PyObject *object);
126123
PyObject *CPyTagged_AsObject(CPyTagged x);
127124
PyObject *CPyTagged_StealAsObject(CPyTagged x);
128125
Py_ssize_t CPyTagged_AsSsize_t(CPyTagged x);
@@ -148,18 +145,18 @@ CPyTagged CPyTagged_FromFloat(double f);
148145
PyObject *CPyLong_FromStrWithBase(PyObject *o, CPyTagged base);
149146
PyObject *CPyLong_FromStr(PyObject *o);
150147
PyObject *CPyBool_Str(bool b);
151-
int64_t CPyLong_AsInt64(PyObject *o);
148+
int64_t CPyLong_AsInt64_(PyObject *o);
152149
int64_t CPyInt64_Divide(int64_t x, int64_t y);
153150
int64_t CPyInt64_Remainder(int64_t x, int64_t y);
154-
int32_t CPyLong_AsInt32(PyObject *o);
151+
int32_t CPyLong_AsInt32_(PyObject *o);
155152
int32_t CPyInt32_Divide(int32_t x, int32_t y);
156153
int32_t CPyInt32_Remainder(int32_t x, int32_t y);
157154
void CPyInt32_Overflow(void);
158-
int16_t CPyLong_AsInt16(PyObject *o);
155+
int16_t CPyLong_AsInt16_(PyObject *o);
159156
int16_t CPyInt16_Divide(int16_t x, int16_t y);
160157
int16_t CPyInt16_Remainder(int16_t x, int16_t y);
161158
void CPyInt16_Overflow(void);
162-
uint8_t CPyLong_AsUInt8(PyObject *o);
159+
uint8_t CPyLong_AsUInt8_(PyObject *o);
163160
void CPyUInt8_Overflow(void);
164161
double CPyTagged_TrueDivide(CPyTagged x, CPyTagged y);
165162

@@ -199,6 +196,41 @@ static inline PyObject *CPyTagged_LongAsObject(CPyTagged x) {
199196
return (PyObject *)(x & ~CPY_INT_TAG);
200197
}
201198

199+
static inline CPyTagged CPyTagged_FromObject(PyObject *object) {
200+
int overflow;
201+
// The overflow check knows about CPyTagged's width
202+
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
203+
if (unlikely(overflow != 0)) {
204+
Py_INCREF(object);
205+
return ((CPyTagged)object) | CPY_INT_TAG;
206+
} else {
207+
return value << 1;
208+
}
209+
}
210+
211+
static inline CPyTagged CPyTagged_StealFromObject(PyObject *object) {
212+
int overflow;
213+
// The overflow check knows about CPyTagged's width
214+
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
215+
if (unlikely(overflow != 0)) {
216+
return ((CPyTagged)object) | CPY_INT_TAG;
217+
} else {
218+
Py_DECREF(object);
219+
return value << 1;
220+
}
221+
}
222+
223+
static inline CPyTagged CPyTagged_BorrowFromObject(PyObject *object) {
224+
int overflow;
225+
// The overflow check knows about CPyTagged's width
226+
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
227+
if (unlikely(overflow != 0)) {
228+
return ((CPyTagged)object) | CPY_INT_TAG;
229+
} else {
230+
return value << 1;
231+
}
232+
}
233+
202234
static inline bool CPyTagged_TooBig(Py_ssize_t value) {
203235
// Micro-optimized for the common case where it fits.
204236
return (size_t)value > CPY_TAGGED_MAX
@@ -286,6 +318,107 @@ static inline bool CPyTagged_IsLe(CPyTagged left, CPyTagged right) {
286318
}
287319
}
288320

321+
static inline int64_t CPyLong_AsInt64(PyObject *o) {
322+
if (likely(PyLong_Check(o))) {
323+
PyLongObject *lobj = (PyLongObject *)o;
324+
Py_ssize_t size = Py_SIZE(lobj);
325+
if (likely(size == 1)) {
326+
// Fast path
327+
return CPY_LONG_DIGIT(lobj, 0);
328+
} else if (likely(size == 0)) {
329+
return 0;
330+
}
331+
}
332+
// Slow path
333+
return CPyLong_AsInt64_(o);
334+
}
335+
336+
static inline int32_t CPyLong_AsInt32(PyObject *o) {
337+
if (likely(PyLong_Check(o))) {
338+
#if CPY_3_12_FEATURES
339+
PyLongObject *lobj = (PyLongObject *)o;
340+
size_t tag = CPY_LONG_TAG(lobj);
341+
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
342+
// Fast path
343+
return CPY_LONG_DIGIT(lobj, 0);
344+
} else if (likely(tag == CPY_SIGN_ZERO)) {
345+
return 0;
346+
}
347+
#else
348+
PyLongObject *lobj = (PyLongObject *)o;
349+
Py_ssize_t size = lobj->ob_base.ob_size;
350+
if (likely(size == 1)) {
351+
// Fast path
352+
return CPY_LONG_DIGIT(lobj, 0);
353+
} else if (likely(size == 0)) {
354+
return 0;
355+
}
356+
#endif
357+
}
358+
// Slow path
359+
return CPyLong_AsInt32_(o);
360+
}
361+
362+
static inline int16_t CPyLong_AsInt16(PyObject *o) {
363+
if (likely(PyLong_Check(o))) {
364+
#if CPY_3_12_FEATURES
365+
PyLongObject *lobj = (PyLongObject *)o;
366+
size_t tag = CPY_LONG_TAG(lobj);
367+
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
368+
// Fast path
369+
digit x = CPY_LONG_DIGIT(lobj, 0);
370+
if (x < 0x8000)
371+
return x;
372+
} else if (likely(tag == CPY_SIGN_ZERO)) {
373+
return 0;
374+
}
375+
#else
376+
PyLongObject *lobj = (PyLongObject *)o;
377+
Py_ssize_t size = lobj->ob_base.ob_size;
378+
if (likely(size == 1)) {
379+
// Fast path
380+
digit x = lobj->ob_digit[0];
381+
if (x < 0x8000)
382+
return x;
383+
} else if (likely(size == 0)) {
384+
return 0;
385+
}
386+
#endif
387+
}
388+
// Slow path
389+
return CPyLong_AsInt16_(o);
390+
}
391+
392+
static inline uint8_t CPyLong_AsUInt8(PyObject *o) {
393+
if (likely(PyLong_Check(o))) {
394+
#if CPY_3_12_FEATURES
395+
PyLongObject *lobj = (PyLongObject *)o;
396+
size_t tag = CPY_LONG_TAG(lobj);
397+
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
398+
// Fast path
399+
digit x = CPY_LONG_DIGIT(lobj, 0);
400+
if (x < 256)
401+
return x;
402+
} else if (likely(tag == CPY_SIGN_ZERO)) {
403+
return 0;
404+
}
405+
#else
406+
PyLongObject *lobj = (PyLongObject *)o;
407+
Py_ssize_t size = lobj->ob_base.ob_size;
408+
if (likely(size == 1)) {
409+
// Fast path
410+
digit x = lobj->ob_digit[0];
411+
if (x < 256)
412+
return x;
413+
} else if (likely(size == 0)) {
414+
return 0;
415+
}
416+
#endif
417+
}
418+
// Slow path
419+
return CPyLong_AsUInt8_(o);
420+
}
421+
289422
static inline CPyTagged CPyTagged_Negate(CPyTagged num) {
290423
if (likely(CPyTagged_CheckShort(num)
291424
&& num != (CPyTagged) ((Py_ssize_t)1 << (CPY_INT_BITS - 1)))) {

mypyc/lib-rt/int_ops.c

Lines changed: 8 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -44,41 +44,6 @@ CPyTagged CPyTagged_FromInt64(int64_t value) {
4444
}
4545
}
4646

47-
CPyTagged CPyTagged_FromObject(PyObject *object) {
48-
int overflow;
49-
// The overflow check knows about CPyTagged's width
50-
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
51-
if (unlikely(overflow != 0)) {
52-
Py_INCREF(object);
53-
return ((CPyTagged)object) | CPY_INT_TAG;
54-
} else {
55-
return value << 1;
56-
}
57-
}
58-
59-
CPyTagged CPyTagged_StealFromObject(PyObject *object) {
60-
int overflow;
61-
// The overflow check knows about CPyTagged's width
62-
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
63-
if (unlikely(overflow != 0)) {
64-
return ((CPyTagged)object) | CPY_INT_TAG;
65-
} else {
66-
Py_DECREF(object);
67-
return value << 1;
68-
}
69-
}
70-
71-
CPyTagged CPyTagged_BorrowFromObject(PyObject *object) {
72-
int overflow;
73-
// The overflow check knows about CPyTagged's width
74-
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
75-
if (unlikely(overflow != 0)) {
76-
return ((CPyTagged)object) | CPY_INT_TAG;
77-
} else {
78-
return value << 1;
79-
}
80-
}
81-
8247
PyObject *CPyTagged_AsObject(CPyTagged x) {
8348
PyObject *value;
8449
if (unlikely(CPyTagged_CheckLong(x))) {
@@ -420,18 +385,8 @@ CPyTagged CPyTagged_Lshift_(CPyTagged left, CPyTagged right) {
420385
return CPyTagged_StealFromObject(result);
421386
}
422387

423-
int64_t CPyLong_AsInt64(PyObject *o) {
424-
if (likely(PyLong_Check(o))) {
425-
PyLongObject *lobj = (PyLongObject *)o;
426-
Py_ssize_t size = Py_SIZE(lobj);
427-
if (likely(size == 1)) {
428-
// Fast path
429-
return CPY_LONG_DIGIT(lobj, 0);
430-
} else if (likely(size == 0)) {
431-
return 0;
432-
}
433-
}
434-
// Slow path
388+
// i64 unboxing slow path
389+
int64_t CPyLong_AsInt64_(PyObject *o) {
435390
int overflow;
436391
int64_t result = PyLong_AsLongLongAndOverflow(o, &overflow);
437392
if (result == -1) {
@@ -479,29 +434,8 @@ int64_t CPyInt64_Remainder(int64_t x, int64_t y) {
479434
return d;
480435
}
481436

482-
int32_t CPyLong_AsInt32(PyObject *o) {
483-
if (likely(PyLong_Check(o))) {
484-
#if CPY_3_12_FEATURES
485-
PyLongObject *lobj = (PyLongObject *)o;
486-
size_t tag = CPY_LONG_TAG(lobj);
487-
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
488-
// Fast path
489-
return CPY_LONG_DIGIT(lobj, 0);
490-
} else if (likely(tag == CPY_SIGN_ZERO)) {
491-
return 0;
492-
}
493-
#else
494-
PyLongObject *lobj = (PyLongObject *)o;
495-
Py_ssize_t size = lobj->ob_base.ob_size;
496-
if (likely(size == 1)) {
497-
// Fast path
498-
return CPY_LONG_DIGIT(lobj, 0);
499-
} else if (likely(size == 0)) {
500-
return 0;
501-
}
502-
#endif
503-
}
504-
// Slow path
437+
// i32 unboxing slow path
438+
int32_t CPyLong_AsInt32_(PyObject *o) {
505439
int overflow;
506440
long result = PyLong_AsLongAndOverflow(o, &overflow);
507441
if (result > 0x7fffffffLL || result < -0x80000000LL) {
@@ -557,33 +491,8 @@ void CPyInt32_Overflow() {
557491
PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32");
558492
}
559493

560-
int16_t CPyLong_AsInt16(PyObject *o) {
561-
if (likely(PyLong_Check(o))) {
562-
#if CPY_3_12_FEATURES
563-
PyLongObject *lobj = (PyLongObject *)o;
564-
size_t tag = CPY_LONG_TAG(lobj);
565-
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
566-
// Fast path
567-
digit x = CPY_LONG_DIGIT(lobj, 0);
568-
if (x < 0x8000)
569-
return x;
570-
} else if (likely(tag == CPY_SIGN_ZERO)) {
571-
return 0;
572-
}
573-
#else
574-
PyLongObject *lobj = (PyLongObject *)o;
575-
Py_ssize_t size = lobj->ob_base.ob_size;
576-
if (likely(size == 1)) {
577-
// Fast path
578-
digit x = lobj->ob_digit[0];
579-
if (x < 0x8000)
580-
return x;
581-
} else if (likely(size == 0)) {
582-
return 0;
583-
}
584-
#endif
585-
}
586-
// Slow path
494+
// i16 unboxing slow path
495+
int16_t CPyLong_AsInt16_(PyObject *o) {
587496
int overflow;
588497
long result = PyLong_AsLongAndOverflow(o, &overflow);
589498
if (result > 0x7fff || result < -0x8000) {
@@ -639,34 +548,8 @@ void CPyInt16_Overflow() {
639548
PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16");
640549
}
641550

642-
643-
uint8_t CPyLong_AsUInt8(PyObject *o) {
644-
if (likely(PyLong_Check(o))) {
645-
#if CPY_3_12_FEATURES
646-
PyLongObject *lobj = (PyLongObject *)o;
647-
size_t tag = CPY_LONG_TAG(lobj);
648-
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
649-
// Fast path
650-
digit x = CPY_LONG_DIGIT(lobj, 0);
651-
if (x < 256)
652-
return x;
653-
} else if (likely(tag == CPY_SIGN_ZERO)) {
654-
return 0;
655-
}
656-
#else
657-
PyLongObject *lobj = (PyLongObject *)o;
658-
Py_ssize_t size = lobj->ob_base.ob_size;
659-
if (likely(size == 1)) {
660-
// Fast path
661-
digit x = lobj->ob_digit[0];
662-
if (x < 256)
663-
return x;
664-
} else if (likely(size == 0)) {
665-
return 0;
666-
}
667-
#endif
668-
}
669-
// Slow path
551+
// u8 unboxing slow path
552+
uint8_t CPyLong_AsUInt8_(PyObject *o) {
670553
int overflow;
671554
long result = PyLong_AsLongAndOverflow(o, &overflow);
672555
if (result < 0 || result >= 256) {

0 commit comments

Comments
 (0)