Skip to content

Commit 0ed91a2

Browse files
authored
gh-90213: Speed up right shifts of negative integers (GH-30277)
1 parent 4b297a9 commit 0ed91a2

File tree

3 files changed

+75
-29
lines changed

3 files changed

+75
-29
lines changed

Lib/test/test_long.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,10 @@ def test_medium_rshift(self):
985985
self.assertEqual((-1122) >> 9, -3)
986986
self.assertEqual(2**128 >> 9, 2**119)
987987
self.assertEqual(-2**128 >> 9, -2**119)
988+
# Exercise corner case of the current algorithm, where the result of
989+
# shifting a two-limb int by the limb size still has two limbs.
990+
self.assertEqual((1 - BASE*BASE) >> SHIFT, -BASE)
991+
self.assertEqual((BASE - 1 - BASE*BASE) >> SHIFT, -BASE)
988992

989993
def test_big_rshift(self):
990994
self.assertEqual(42 >> 32, 0)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Speed up right shift of negative integers, by removing unnecessary creation
2+
of temporaries. Original patch by Xinhang Xu, reworked by Mark Dickinson.

Objects/longobject.c

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4688,13 +4688,23 @@ divmod_shift(PyObject *shiftby, Py_ssize_t *wordshift, digit *remshift)
46884688
return 0;
46894689
}
46904690

4691+
/* Inner function for both long_rshift and _PyLong_Rshift, shifting an
4692+
integer right by PyLong_SHIFT*wordshift + remshift bits.
4693+
wordshift should be nonnegative. */
4694+
46914695
static PyObject *
46924696
long_rshift1(PyLongObject *a, Py_ssize_t wordshift, digit remshift)
46934697
{
46944698
PyLongObject *z = NULL;
4695-
Py_ssize_t newsize, hishift, i, j;
4699+
Py_ssize_t newsize, hishift, size_a;
46964700
twodigits accum;
4701+
int a_negative;
4702+
4703+
/* Total number of bits shifted must be nonnegative. */
4704+
assert(wordshift >= 0);
4705+
assert(remshift < PyLong_SHIFT);
46974706

4707+
/* Fast path for small a. */
46984708
if (IS_MEDIUM_VALUE(a)) {
46994709
stwodigits m, x;
47004710
digit shift;
@@ -4704,37 +4714,67 @@ long_rshift1(PyLongObject *a, Py_ssize_t wordshift, digit remshift)
47044714
return _PyLong_FromSTwoDigits(x);
47054715
}
47064716

4707-
if (Py_SIZE(a) < 0) {
4708-
/* Right shifting negative numbers is harder */
4709-
PyLongObject *a1, *a2;
4710-
a1 = (PyLongObject *) long_invert(a);
4711-
if (a1 == NULL)
4712-
return NULL;
4713-
a2 = (PyLongObject *) long_rshift1(a1, wordshift, remshift);
4714-
Py_DECREF(a1);
4715-
if (a2 == NULL)
4716-
return NULL;
4717-
z = (PyLongObject *) long_invert(a2);
4718-
Py_DECREF(a2);
4717+
a_negative = Py_SIZE(a) < 0;
4718+
size_a = Py_ABS(Py_SIZE(a));
4719+
4720+
if (a_negative) {
4721+
/* For negative 'a', adjust so that 0 < remshift <= PyLong_SHIFT,
4722+
while keeping PyLong_SHIFT*wordshift + remshift the same. This
4723+
ensures that 'newsize' is computed correctly below. */
4724+
if (remshift == 0) {
4725+
if (wordshift == 0) {
4726+
/* Can only happen if the original shift was 0. */
4727+
return long_long((PyObject *)a);
4728+
}
4729+
remshift = PyLong_SHIFT;
4730+
--wordshift;
4731+
}
47194732
}
4720-
else {
4721-
newsize = Py_SIZE(a) - wordshift;
4722-
if (newsize <= 0)
4723-
return PyLong_FromLong(0);
4724-
hishift = PyLong_SHIFT - remshift;
4725-
z = _PyLong_New(newsize);
4726-
if (z == NULL)
4727-
return NULL;
4728-
j = wordshift;
4729-
accum = a->ob_digit[j++] >> remshift;
4730-
for (i = 0; j < Py_SIZE(a); i++, j++) {
4731-
accum |= (twodigits)a->ob_digit[j] << hishift;
4732-
z->ob_digit[i] = (digit)(accum & PyLong_MASK);
4733-
accum >>= PyLong_SHIFT;
4733+
4734+
assert(wordshift >= 0);
4735+
newsize = size_a - wordshift;
4736+
if (newsize <= 0) {
4737+
/* Shifting all the bits of 'a' out gives either -1 or 0. */
4738+
return PyLong_FromLong(-a_negative);
4739+
}
4740+
z = _PyLong_New(newsize);
4741+
if (z == NULL) {
4742+
return NULL;
4743+
}
4744+
hishift = PyLong_SHIFT - remshift;
4745+
4746+
accum = a->ob_digit[wordshift];
4747+
if (a_negative) {
4748+
/*
4749+
For a positive integer a and nonnegative shift, we have:
4750+
4751+
(-a) >> shift == -((a + 2**shift - 1) >> shift).
4752+
4753+
In the addition `a + (2**shift - 1)`, the low `wordshift` digits of
4754+
`2**shift - 1` all have value `PyLong_MASK`, so we get a carry out
4755+
from the bottom `wordshift` digits when at least one of the least
4756+
significant `wordshift` digits of `a` is nonzero. Digit `wordshift`
4757+
of `2**shift - 1` has value `PyLong_MASK >> hishift`.
4758+
*/
4759+
Py_SET_SIZE(z, -newsize);
4760+
4761+
digit sticky = 0;
4762+
for (Py_ssize_t j = 0; j < wordshift; j++) {
4763+
sticky |= a->ob_digit[j];
47344764
}
4735-
z->ob_digit[i] = (digit)accum;
4736-
z = maybe_small_long(long_normalize(z));
4765+
accum += (PyLong_MASK >> hishift) + (digit)(sticky != 0);
47374766
}
4767+
4768+
accum >>= remshift;
4769+
for (Py_ssize_t i = 0, j = wordshift + 1; j < size_a; i++, j++) {
4770+
accum += (twodigits)a->ob_digit[j] << hishift;
4771+
z->ob_digit[i] = (digit)(accum & PyLong_MASK);
4772+
accum >>= PyLong_SHIFT;
4773+
}
4774+
assert(accum <= PyLong_MASK);
4775+
z->ob_digit[newsize - 1] = (digit)accum;
4776+
4777+
z = maybe_small_long(long_normalize(z));
47384778
return (PyObject *)z;
47394779
}
47404780

0 commit comments

Comments
 (0)