Skip to content

Commit 7861dfd

Browse files
authored
gh-111140: Adds PyLong_AsNativeBytes and PyLong_FromNative[Unsigned]Bytes functions (GH-114886)
1 parent a82fbc1 commit 7861dfd

File tree

14 files changed

+533
-26
lines changed

14 files changed

+533
-26
lines changed

Doc/c-api/long.rst

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,28 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate.
113113
retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`.
114114
115115
116+
.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness)
117+
118+
Create a Python integer from the value contained in the first *n_bytes* of
119+
*buffer*, interpreted as a two's-complement signed number.
120+
121+
*endianness* may be passed ``-1`` for the native endian that CPython was
122+
compiled with, or else ``0`` for big endian and ``1`` for little.
123+
124+
.. versionadded:: 3.13
125+
126+
127+
.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness)
128+
129+
Create a Python integer from the value contained in the first *n_bytes* of
130+
*buffer*, interpreted as an unsigned number.
131+
132+
*endianness* may be passed ``-1`` for the native endian that CPython was
133+
compiled with, or else ``0`` for big endian and ``1`` for little.
134+
135+
.. versionadded:: 3.13
136+
137+
116138
.. XXX alias PyLong_AS_LONG (for now)
117139
.. c:function:: long PyLong_AsLong(PyObject *obj)
118140
@@ -332,6 +354,50 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate.
332354
Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate.
333355
334356
357+
.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness)
358+
359+
Copy the Python integer value to a native *buffer* of size *n_bytes*::
360+
361+
int value;
362+
Py_ssize_t bytes = PyLong_CopyBits(v, &value, sizeof(value), -1);
363+
if (bytes < 0) {
364+
// Error occurred
365+
return NULL;
366+
}
367+
else if (bytes > sizeof(value)) {
368+
// Overflow occurred, but 'value' contains as much as could fit
369+
}
370+
371+
*endianness* may be passed ``-1`` for the native endian that CPython was
372+
compiled with, or ``0`` for big endian and ``1`` for little.
373+
374+
Return ``-1`` with an exception raised if *pylong* cannot be interpreted as
375+
an integer. Otherwise, return the size of the buffer required to store the
376+
value. If this is equal to or less than *n_bytes*, the entire value was
377+
copied.
378+
379+
Unless an exception is raised, all *n_bytes* of the buffer will be written
380+
with as much of the value as can fit. This allows the caller to ignore all
381+
non-negative results if the intent is to match the typical behavior of a
382+
C-style downcast.
383+
384+
Values are always copied as twos-complement, and sufficient size will be
385+
requested for a sign bit. For example, this may cause an value that fits into
386+
8 bytes when treated as unsigned to request 9 bytes, even though all eight
387+
bytes were copied into the buffer. What has been omitted is the zero sign
388+
bit, which is redundant when the intention is to treat the value as unsigned.
389+
390+
Passing *n_bytes* of zero will always return the requested buffer size.
391+
392+
.. note::
393+
394+
When the value does not fit in the provided buffer, the requested size
395+
returned from the function may be larger than necessary. Passing 0 to this
396+
function is not an accurate way to determine the bit length of a value.
397+
398+
.. versionadded:: 3.13
399+
400+
335401
.. c:function:: int PyUnstable_Long_IsCompact(const PyLongObject* op)
336402
337403
Return 1 if *op* is compact, 0 otherwise.

Doc/whatsnew/3.13.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ Tier 2 IR by Mark Shannon and Guido van Rossum.
587587
Tier 2 optimizer by Ken Jin.)
588588

589589

590+
590591
Deprecated
591592
==========
592593

@@ -1526,6 +1527,11 @@ New Features
15261527

15271528
(Contributed by Victor Stinner and Petr Viktorin in :gh:`110850`.)
15281529

1530+
* Add :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and
1531+
:c:func:`PyLong_FromUnsignedNativeBytes` functions to simplify converting
1532+
between native integer types and Python :class:`int` objects.
1533+
(Contributed by Steve Dower in :gh:`111140`.)
1534+
15291535

15301536
Porting to Python 3.13
15311537
----------------------
@@ -1585,7 +1591,6 @@ Porting to Python 3.13
15851591
platforms, the ``HAVE_STDDEF_H`` macro is only defined on Windows.
15861592
(Contributed by Victor Stinner in :gh:`108765`.)
15871593

1588-
15891594
Deprecated
15901595
----------
15911596

Include/cpython/longobject.h

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,40 @@
44

55
PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base);
66

7+
/* PyLong_AsNativeBytes: Copy the integer value to a native variable.
8+
buffer points to the first byte of the variable.
9+
n_bytes is the number of bytes available in the buffer. Pass 0 to request
10+
the required size for the value.
11+
endianness is -1 for native endian, 0 for big endian or 1 for little.
12+
Big endian mode will write the most significant byte into the address
13+
directly referenced by buffer; little endian will write the least significant
14+
byte into that address.
15+
16+
If an exception is raised, returns a negative value.
17+
Otherwise, returns the number of bytes that are required to store the value.
18+
To check that the full value is represented, ensure that the return value is
19+
equal or less than n_bytes.
20+
All n_bytes are guaranteed to be written (unless an exception occurs), and
21+
so ignoring a positive return value is the equivalent of a downcast in C.
22+
In cases where the full value could not be represented, the returned value
23+
may be larger than necessary - this function is not an accurate way to
24+
calculate the bit length of an integer object.
25+
*/
26+
PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer,
27+
Py_ssize_t n_bytes, int endianness);
28+
29+
/* PyLong_FromNativeBytes: Create an int value from a native integer
30+
n_bytes is the number of bytes to read from the buffer. Passing 0 will
31+
always produce the zero int.
32+
PyLong_FromUnsignedNativeBytes always produces a non-negative int.
33+
endianness is -1 for native endian, 0 for big endian or 1 for little.
34+
35+
Returns the int object, or NULL with an exception set. */
36+
PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes,
37+
int endianness);
38+
PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer,
39+
size_t n_bytes, int endianness);
40+
741
PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op);
842
PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op);
943

@@ -50,7 +84,7 @@ PyAPI_FUNC(PyObject *) _PyLong_FromByteArray(
5084
*/
5185
PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v,
5286
unsigned char* bytes, size_t n,
53-
int little_endian, int is_signed);
87+
int little_endian, int is_signed, int with_exceptions);
5488

5589
/* For use by the gcd function in mathmodule.c */
5690
PyAPI_FUNC(PyObject *) _PyLong_GCD(PyObject *, PyObject *);

Lib/test/test_capi/test_long.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import unittest
22
import sys
3+
import test.support as support
34

45
from test.support import import_helper
56

@@ -423,6 +424,150 @@ def test_long_asvoidptr(self):
423424
self.assertRaises(OverflowError, asvoidptr, -2**1000)
424425
# CRASHES asvoidptr(NULL)
425426

427+
def test_long_asnativebytes(self):
428+
import math
429+
from _testcapi import (
430+
pylong_asnativebytes as asnativebytes,
431+
SIZE_MAX,
432+
)
433+
434+
# Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot
435+
SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8)
436+
MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1
437+
MAX_USIZE = 2 ** (SZ * 8) - 1
438+
if support.verbose:
439+
print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}")
440+
441+
# These tests check that the requested buffer size is correct
442+
for v, expect in [
443+
(0, SZ),
444+
(512, SZ),
445+
(-512, SZ),
446+
(MAX_SSIZE, SZ),
447+
(MAX_USIZE, SZ + 1),
448+
(-MAX_SSIZE, SZ),
449+
(-MAX_USIZE, SZ + 1),
450+
(2**255-1, 32),
451+
(-(2**255-1), 32),
452+
(2**256-1, 33),
453+
(-(2**256-1), 33),
454+
]:
455+
with self.subTest(f"sizeof-{v:X}"):
456+
buffer = bytearray(1)
457+
self.assertEqual(expect, asnativebytes(v, buffer, 0, -1),
458+
"PyLong_AsNativeBytes(v, NULL, 0, -1)")
459+
# Also check via the __index__ path
460+
self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1),
461+
"PyLong_AsNativeBytes(Index(v), NULL, 0, -1)")
462+
463+
# We request as many bytes as `expect_be` contains, and always check
464+
# the result (both big and little endian). We check the return value
465+
# independently, since the buffer should always be filled correctly even
466+
# if we need more bytes
467+
for v, expect_be, expect_n in [
468+
(0, b'\x00', 1),
469+
(0, b'\x00' * 2, 2),
470+
(0, b'\x00' * 8, min(8, SZ)),
471+
(1, b'\x01', 1),
472+
(1, b'\x00' * 10 + b'\x01', min(11, SZ)),
473+
(42, b'\x2a', 1),
474+
(42, b'\x00' * 10 + b'\x2a', min(11, SZ)),
475+
(-1, b'\xff', 1),
476+
(-1, b'\xff' * 10, min(11, SZ)),
477+
(-42, b'\xd6', 1),
478+
(-42, b'\xff' * 10 + b'\xd6', min(11, SZ)),
479+
# Extracts 255 into a single byte, but requests sizeof(Py_ssize_t)
480+
(255, b'\xff', SZ),
481+
(255, b'\x00\xff', 2),
482+
(256, b'\x01\x00', 2),
483+
# Extracts successfully (unsigned), but requests 9 bytes
484+
(2**63, b'\x80' + b'\x00' * 7, 9),
485+
# "Extracts", but requests 9 bytes
486+
(-2**63, b'\x80' + b'\x00' * 7, 9),
487+
(2**63, b'\x00\x80' + b'\x00' * 7, 9),
488+
(-2**63, b'\xff\x80' + b'\x00' * 7, 9),
489+
490+
(2**255-1, b'\x7f' + b'\xff' * 31, 32),
491+
(-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', 32),
492+
# Request extra bytes, but result says we only needed 32
493+
(-(2**255-1), b'\xff\x80' + b'\x00' * 30 + b'\x01', 32),
494+
(-(2**255-1), b'\xff\xff\x80' + b'\x00' * 30 + b'\x01', 32),
495+
496+
# Extracting 256 bits of integer will request 33 bytes, but still
497+
# copy as many bits as possible into the buffer. So we *can* copy
498+
# into a 32-byte buffer, though negative number may be unrecoverable
499+
(2**256-1, b'\xff' * 32, 33),
500+
(2**256-1, b'\x00' + b'\xff' * 32, 33),
501+
(-(2**256-1), b'\x00' * 31 + b'\x01', 33),
502+
(-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33),
503+
(-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33),
504+
505+
# The classic "Windows HRESULT as negative number" case
506+
# HRESULT hr;
507+
# PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT))
508+
# assert(hr == E_FAIL)
509+
(-2147467259, b'\x80\x00\x40\x05', 4),
510+
]:
511+
with self.subTest(f"{v:X}-{len(expect_be)}bytes"):
512+
n = len(expect_be)
513+
buffer = bytearray(n)
514+
expect_le = expect_be[::-1]
515+
516+
self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0),
517+
f"PyLong_AsNativeBytes(v, buffer, {n}, <big>)")
518+
self.assertEqual(expect_be, buffer[:n], "<big>")
519+
self.assertEqual(expect_n, asnativebytes(v, buffer, n, 1),
520+
f"PyLong_AsNativeBytes(v, buffer, {n}, <little>)")
521+
self.assertEqual(expect_le, buffer[:n], "<little>")
522+
523+
# Check a few error conditions. These are validated in code, but are
524+
# unspecified in docs, so if we make changes to the implementation, it's
525+
# fine to just update these tests rather than preserve the behaviour.
526+
with self.assertRaises(SystemError):
527+
asnativebytes(1, buffer, 0, 2)
528+
with self.assertRaises(TypeError):
529+
asnativebytes('not a number', buffer, 0, -1)
530+
531+
def test_long_fromnativebytes(self):
532+
import math
533+
from _testcapi import (
534+
pylong_fromnativebytes as fromnativebytes,
535+
SIZE_MAX,
536+
)
537+
538+
# Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot
539+
SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8)
540+
MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1
541+
MAX_USIZE = 2 ** (SZ * 8) - 1
542+
543+
for v_be, expect_s, expect_u in [
544+
(b'\x00', 0, 0),
545+
(b'\x01', 1, 1),
546+
(b'\xff', -1, 255),
547+
(b'\x00\xff', 255, 255),
548+
(b'\xff\xff', -1, 65535),
549+
]:
550+
with self.subTest(f"{expect_s}-{expect_u:X}-{len(v_be)}bytes"):
551+
n = len(v_be)
552+
v_le = v_be[::-1]
553+
554+
self.assertEqual(expect_s, fromnativebytes(v_be, n, 0, 1),
555+
f"PyLong_FromNativeBytes(buffer, {n}, <big>)")
556+
self.assertEqual(expect_s, fromnativebytes(v_le, n, 1, 1),
557+
f"PyLong_FromNativeBytes(buffer, {n}, <little>)")
558+
self.assertEqual(expect_u, fromnativebytes(v_be, n, 0, 0),
559+
f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <big>)")
560+
self.assertEqual(expect_u, fromnativebytes(v_le, n, 1, 0),
561+
f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <little>)")
562+
563+
# Check native endian when the result would be the same either
564+
# way and we can test it.
565+
if v_be == v_le:
566+
self.assertEqual(expect_s, fromnativebytes(v_be, n, -1, 1),
567+
f"PyLong_FromNativeBytes(buffer, {n}, <native>)")
568+
self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0),
569+
f"PyLong_FromUnsignedNativeBytes(buffer, {n}, <native>)")
570+
426571

427572
if __name__ == "__main__":
428573
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Adds :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and
2+
:c:func:`PyLong_FromUnsignedNativeBytes` functions.

Modules/_io/textio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2393,7 +2393,7 @@ textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
23932393
return -1;
23942394

23952395
if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2396-
PY_LITTLE_ENDIAN, 0) < 0) {
2396+
PY_LITTLE_ENDIAN, 0, 1) < 0) {
23972397
Py_DECREF(cookieLong);
23982398
return -1;
23992399
}

Modules/_pickle.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2162,7 +2162,8 @@ save_long(PicklerObject *self, PyObject *obj)
21622162
pdata = (unsigned char *)PyBytes_AS_STRING(repr);
21632163
i = _PyLong_AsByteArray((PyLongObject *)obj,
21642164
pdata, nbytes,
2165-
1 /* little endian */ , 1 /* signed */ );
2165+
1 /* little endian */ , 1 /* signed */ ,
2166+
1 /* with exceptions */);
21662167
if (i < 0)
21672168
goto error;
21682169
/* If the int is negative, this may be a byte more than

Modules/_randommodule.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,8 @@ random_seed(RandomObject *self, PyObject *arg)
342342
res = _PyLong_AsByteArray((PyLongObject *)n,
343343
(unsigned char *)key, keyused * 4,
344344
PY_LITTLE_ENDIAN,
345-
0); /* unsigned */
345+
0, /* unsigned */
346+
1); /* with exceptions */
346347
if (res == -1) {
347348
goto Done;
348349
}

Modules/_sqlite/util.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ _pysqlite_long_as_int64(PyObject * py_val)
162162
sqlite_int64 int64val;
163163
if (_PyLong_AsByteArray((PyLongObject *)py_val,
164164
(unsigned char *)&int64val, sizeof(int64val),
165-
IS_LITTLE_ENDIAN, 1 /* signed */) >= 0) {
165+
IS_LITTLE_ENDIAN, 1 /* signed */, 0) >= 0) {
166166
return int64val;
167167
}
168168
}

0 commit comments

Comments
 (0)