Skip to content

Commit 09c48a4

Browse files
authored
[mypyc] Inline tagged integer arithmetic and bitwise operations (#17265)
Inline the fast path of various tagged integer operations by using C inline functions. Most of these operations are very quick, so getting rid of the overhead of a C call improves performance significantly. This also enables the C compiler to optimize things more, if there are constant operands, for example. This speeds up an older version of the richards benchmark, which didn't use native integers, by 10% (on CPython 3.12). Even bigger improvements are possible in some microbenchmarks. We didn't do this in the past because of worries about compilation time. However, I couldn't measure an impact to self-compilation speed, and the binary size is only increased by about 0.1%. Work on mypyc/mypyc#757.
1 parent f989414 commit 09c48a4

File tree

2 files changed

+185
-142
lines changed

2 files changed

+185
-142
lines changed

mypyc/lib-rt/CPy.h

+150-12
Original file line numberDiff line numberDiff line change
@@ -129,20 +129,20 @@ Py_ssize_t CPyTagged_AsSsize_t(CPyTagged x);
129129
void CPyTagged_IncRef(CPyTagged x);
130130
void CPyTagged_DecRef(CPyTagged x);
131131
void CPyTagged_XDecRef(CPyTagged x);
132-
CPyTagged CPyTagged_Negate(CPyTagged num);
133-
CPyTagged CPyTagged_Invert(CPyTagged num);
134-
CPyTagged CPyTagged_Add(CPyTagged left, CPyTagged right);
135-
CPyTagged CPyTagged_Subtract(CPyTagged left, CPyTagged right);
136-
CPyTagged CPyTagged_Multiply(CPyTagged left, CPyTagged right);
137-
CPyTagged CPyTagged_FloorDivide(CPyTagged left, CPyTagged right);
138-
CPyTagged CPyTagged_Remainder(CPyTagged left, CPyTagged right);
139-
CPyTagged CPyTagged_And(CPyTagged left, CPyTagged right);
140-
CPyTagged CPyTagged_Or(CPyTagged left, CPyTagged right);
141-
CPyTagged CPyTagged_Xor(CPyTagged left, CPyTagged right);
142-
CPyTagged CPyTagged_Rshift(CPyTagged left, CPyTagged right);
143-
CPyTagged CPyTagged_Lshift(CPyTagged left, CPyTagged right);
132+
144133
bool CPyTagged_IsEq_(CPyTagged left, CPyTagged right);
145134
bool CPyTagged_IsLt_(CPyTagged left, CPyTagged right);
135+
CPyTagged CPyTagged_Negate_(CPyTagged num);
136+
CPyTagged CPyTagged_Invert_(CPyTagged num);
137+
CPyTagged CPyTagged_Add_(CPyTagged left, CPyTagged right);
138+
CPyTagged CPyTagged_Subtract_(CPyTagged left, CPyTagged right);
139+
CPyTagged CPyTagged_Multiply_(CPyTagged left, CPyTagged right);
140+
CPyTagged CPyTagged_FloorDivide_(CPyTagged left, CPyTagged right);
141+
CPyTagged CPyTagged_Remainder_(CPyTagged left, CPyTagged right);
142+
CPyTagged CPyTagged_BitwiseLongOp_(CPyTagged a, CPyTagged b, char op);
143+
CPyTagged CPyTagged_Rshift_(CPyTagged left, CPyTagged right);
144+
CPyTagged CPyTagged_Lshift_(CPyTagged left, CPyTagged right);
145+
146146
PyObject *CPyTagged_Str(CPyTagged n);
147147
CPyTagged CPyTagged_FromFloat(double f);
148148
PyObject *CPyLong_FromStrWithBase(PyObject *o, CPyTagged base);
@@ -286,6 +286,144 @@ static inline bool CPyTagged_IsLe(CPyTagged left, CPyTagged right) {
286286
}
287287
}
288288

289+
static inline CPyTagged CPyTagged_Negate(CPyTagged num) {
290+
if (likely(CPyTagged_CheckShort(num)
291+
&& num != (CPyTagged) ((Py_ssize_t)1 << (CPY_INT_BITS - 1)))) {
292+
// The only possibility of an overflow error happening when negating a short is if we
293+
// attempt to negate the most negative number.
294+
return -num;
295+
}
296+
return CPyTagged_Negate_(num);
297+
}
298+
299+
static inline CPyTagged CPyTagged_Add(CPyTagged left, CPyTagged right) {
300+
// TODO: Use clang/gcc extension __builtin_saddll_overflow instead.
301+
if (likely(CPyTagged_CheckShort(left) && CPyTagged_CheckShort(right))) {
302+
CPyTagged sum = left + right;
303+
if (likely(!CPyTagged_IsAddOverflow(sum, left, right))) {
304+
return sum;
305+
}
306+
}
307+
return CPyTagged_Add_(left, right);
308+
}
309+
310+
static inline CPyTagged CPyTagged_Subtract(CPyTagged left, CPyTagged right) {
311+
// TODO: Use clang/gcc extension __builtin_saddll_overflow instead.
312+
if (likely(CPyTagged_CheckShort(left) && CPyTagged_CheckShort(right))) {
313+
CPyTagged diff = left - right;
314+
if (likely(!CPyTagged_IsSubtractOverflow(diff, left, right))) {
315+
return diff;
316+
}
317+
}
318+
return CPyTagged_Subtract_(left, right);
319+
}
320+
321+
static inline CPyTagged CPyTagged_Multiply(CPyTagged left, CPyTagged right) {
322+
// TODO: Consider using some clang/gcc extension to check for overflow
323+
if (CPyTagged_CheckShort(left) && CPyTagged_CheckShort(right)) {
324+
if (!CPyTagged_IsMultiplyOverflow(left, right)) {
325+
return left * CPyTagged_ShortAsSsize_t(right);
326+
}
327+
}
328+
return CPyTagged_Multiply_(left, right);
329+
}
330+
331+
static inline CPyTagged CPyTagged_FloorDivide(CPyTagged left, CPyTagged right) {
332+
if (CPyTagged_CheckShort(left)
333+
&& CPyTagged_CheckShort(right)
334+
&& !CPyTagged_MaybeFloorDivideFault(left, right)) {
335+
Py_ssize_t result = CPyTagged_ShortAsSsize_t(left) / CPyTagged_ShortAsSsize_t(right);
336+
if (((Py_ssize_t)left < 0) != (((Py_ssize_t)right) < 0)) {
337+
if (result * right != left) {
338+
// Round down
339+
result--;
340+
}
341+
}
342+
return result << 1;
343+
}
344+
return CPyTagged_FloorDivide_(left, right);
345+
}
346+
347+
static inline CPyTagged CPyTagged_Remainder(CPyTagged left, CPyTagged right) {
348+
if (CPyTagged_CheckShort(left) && CPyTagged_CheckShort(right)
349+
&& !CPyTagged_MaybeRemainderFault(left, right)) {
350+
Py_ssize_t result = (Py_ssize_t)left % (Py_ssize_t)right;
351+
if (((Py_ssize_t)right < 0) != ((Py_ssize_t)left < 0) && result != 0) {
352+
result += right;
353+
}
354+
return result;
355+
}
356+
return CPyTagged_Remainder_(left, right);
357+
}
358+
359+
// Bitwise '~'
360+
static inline CPyTagged CPyTagged_Invert(CPyTagged num) {
361+
if (likely(CPyTagged_CheckShort(num) && num != CPY_TAGGED_ABS_MIN)) {
362+
return ~num & ~CPY_INT_TAG;
363+
}
364+
return CPyTagged_Invert_(num);
365+
}
366+
367+
// Bitwise '&'
368+
static inline CPyTagged CPyTagged_And(CPyTagged left, CPyTagged right) {
369+
if (likely(CPyTagged_CheckShort(left) && CPyTagged_CheckShort(right))) {
370+
return left & right;
371+
}
372+
return CPyTagged_BitwiseLongOp_(left, right, '&');
373+
}
374+
375+
// Bitwise '|'
376+
static inline CPyTagged CPyTagged_Or(CPyTagged left, CPyTagged right) {
377+
if (likely(CPyTagged_CheckShort(left) && CPyTagged_CheckShort(right))) {
378+
return left | right;
379+
}
380+
return CPyTagged_BitwiseLongOp_(left, right, '|');
381+
}
382+
383+
// Bitwise '^'
384+
static inline CPyTagged CPyTagged_Xor(CPyTagged left, CPyTagged right) {
385+
if (likely(CPyTagged_CheckShort(left) && CPyTagged_CheckShort(right))) {
386+
return left ^ right;
387+
}
388+
return CPyTagged_BitwiseLongOp_(left, right, '^');
389+
}
390+
391+
// Bitwise '>>'
392+
static inline CPyTagged CPyTagged_Rshift(CPyTagged left, CPyTagged right) {
393+
if (likely(CPyTagged_CheckShort(left)
394+
&& CPyTagged_CheckShort(right)
395+
&& (Py_ssize_t)right >= 0)) {
396+
CPyTagged count = CPyTagged_ShortAsSsize_t(right);
397+
if (unlikely(count >= CPY_INT_BITS)) {
398+
if ((Py_ssize_t)left >= 0) {
399+
return 0;
400+
} else {
401+
return CPyTagged_ShortFromInt(-1);
402+
}
403+
}
404+
return ((Py_ssize_t)left >> count) & ~CPY_INT_TAG;
405+
}
406+
return CPyTagged_Rshift_(left, right);
407+
}
408+
409+
static inline bool IsShortLshiftOverflow(Py_ssize_t short_int, Py_ssize_t shift) {
410+
return ((Py_ssize_t)(short_int << shift) >> shift) != short_int;
411+
}
412+
413+
// Bitwise '<<'
414+
static inline CPyTagged CPyTagged_Lshift(CPyTagged left, CPyTagged right) {
415+
if (likely(CPyTagged_CheckShort(left)
416+
&& CPyTagged_CheckShort(right)
417+
&& (Py_ssize_t)right >= 0
418+
&& right < CPY_INT_BITS * 2)) {
419+
CPyTagged shift = CPyTagged_ShortAsSsize_t(right);
420+
if (!IsShortLshiftOverflow(left, shift))
421+
// Short integers, no overflow
422+
return left << shift;
423+
}
424+
return CPyTagged_Lshift_(left, right);
425+
}
426+
289427

290428
// Float operations
291429

0 commit comments

Comments
 (0)