From 4216b2b11903f3adab718d4d3ff2df86bec76b6f Mon Sep 17 00:00:00 2001 From: Eli Libman Date: Sun, 22 May 2022 13:51:51 -0700 Subject: [PATCH 1/2] gh-93065: Fix HAMT to iterate correctly over 7-level deep trees. Also while there, clarify a few things about why we reduce the hash to 32 bits. Co-authored-by: Yury Selivanov --- Include/internal/pycore_hamt.h | 14 +++++++- Lib/test/test_context.py | 35 +++++++++++++++++++ Misc/ACKS | 1 + ...2-05-21-23-21-37.gh-issue-93065.5I18WC.rst | 5 +++ Python/hamt.c | 14 ++++++-- 5 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst diff --git a/Include/internal/pycore_hamt.h b/Include/internal/pycore_hamt.h index 85e35c5afc90cf..4d64288bbab494 100644 --- a/Include/internal/pycore_hamt.h +++ b/Include/internal/pycore_hamt.h @@ -5,7 +5,19 @@ # error "this header requires Py_BUILD_CORE define" #endif -#define _Py_HAMT_MAX_TREE_DEPTH 7 + +/* +HAMT tree is shaped by hashes of keys. Every group of 5 bits of a hash denotes +the exact position of the key in one level of the tree. Since we're using +32 bit hashes, we can have at most 7 such levels. Although if there are +two distinct keys with equal hashes, they will have to occupy the same +cell in the 7th level of the tree -- so we'd put them in a "collision" node. +Which brings the total possible tree depth to 8. Read more about the actual +layout of the HAMT tree in `hamt.c`. + +This constant is used to define a datastucture for storing iteration state. +*/ +#define _Py_HAMT_MAX_TREE_DEPTH 8 extern PyTypeObject _PyHamt_Type; diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py index 3132cea668cb1b..b1aece4f5c9c49 100644 --- a/Lib/test/test_context.py +++ b/Lib/test/test_context.py @@ -535,6 +535,41 @@ def test_hamt_collision_1(self): self.assertEqual(len(h4), 2) self.assertEqual(len(h5), 3) + def test_hamt_collision_3(self): + # Test that iteration works with the deepest tree possible. + # https://github.com/python/cpython/issues/93065 + + C = HashKey(0b10000000_00000000_00000000_00000000, 'C') + D = HashKey(0b10000000_00000000_00000000_00000000, 'D') + + E = HashKey(0b00000000_00000000_00000000_00000000, 'E') + + h = hamt() + h = h.set(C, 'C') + h = h.set(D, 'D') + h = h.set(E, 'E') + + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=4 count=2 bitmap=0b101): + # : 'E' + # NULL: + # CollisionNode(size=4 id=0x107a24520): + # : 'C' + # : 'D' + + self.assertEqual({k.name for k in h.keys()}, {'C', 'D', 'E'}) + def test_hamt_stress(self): COLLECTION_SIZE = 7000 TEST_ITERS_EVERY = 647 diff --git a/Misc/ACKS b/Misc/ACKS index f3d8924ea62af5..d0e18303434f22 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1062,6 +1062,7 @@ Robert Li Xuanji Li Zekun Li Zheao Li +Eli Libman Dan Lidral-Porter Robert van Liere Ross Light diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst b/Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst new file mode 100644 index 00000000000000..e7b3a34c3f449f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst @@ -0,0 +1,5 @@ +Fix contextvars HAMT implementation to handle iteration over deep trees. + +The bug was discovered and fixed by Eli Libman. See [1] for more details. + +[1] https://github.com/MagicStack/immutables/issues/84 diff --git a/Python/hamt.c b/Python/hamt.c index c3cb4e6fda4500..908c2531870314 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -409,14 +409,22 @@ hamt_hash(PyObject *o) return -1; } - /* While it's suboptimal to reduce Python's 64 bit hash to + /* While it's somewhat suboptimal to reduce Python's 64 bit hash to 32 bits via XOR, it seems that the resulting hash function is good enough (this is also how Long type is hashed in Java.) Storing 10, 100, 1000 Python strings results in a relatively shallow and uniform tree structure. - Please don't change this hashing algorithm, as there are many - tests that test some exact tree shape to cover all code paths. + Also it's worth noting that it would be possible to adapt the tree + structure to 64 bit hashes, but that would increase memory pressure + and provide little to no performance benefits for collections with + fewer than billions of key/value pairs. + + Important: do not change this hash reducing function. There are many + tests that need an exact tree shape to cover all code paths and + we do that by specifying concrete values for test data's `__hash__`. + If this function is changed most of the regression tests would + become useless. */ int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32); return xored == -1 ? -2 : xored; From fa004e3fe21fb7ca231fa6c3aed556165d90558d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 23 May 2022 20:19:41 +0200 Subject: [PATCH 2/2] Change NEWS entry link to an inline link. --- .../2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst b/Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst index e7b3a34c3f449f..ea801653f75025 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2022-05-21-23-21-37.gh-issue-93065.5I18WC.rst @@ -1,5 +1,5 @@ Fix contextvars HAMT implementation to handle iteration over deep trees. -The bug was discovered and fixed by Eli Libman. See [1] for more details. - -[1] https://github.com/MagicStack/immutables/issues/84 +The bug was discovered and fixed by Eli Libman. See +`MagicStack/immutables#84 `_ +for more details.