From 40fb0cb1442074bdb61d6e79b79a11a45bc77808 Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Sun, 21 Jun 2020 13:11:21 -0500 Subject: [PATCH 1/8] Add pickling of dict_keys --- cloudpickle/cloudpickle.py | 10 ++++++++++ tests/cloudpickle_test.py | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 3cfc7c974..24cd9c74d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -42,6 +42,7 @@ """ from __future__ import print_function +import _collections_abc import abc import builtins import dis @@ -981,6 +982,11 @@ def save_root_logger(self, obj): dispatch[logging.RootLogger] = save_root_logger + def save_dict_keys(self, obj): + self.save_reduce(_make_dict_keys, (list(obj),)) + + dispatch[_collections_abc.dict_keys] = save_dict_keys + if hasattr(types, "MappingProxyType"): # pragma: no branch def save_mappingproxy(self, obj): self.save_reduce(types.MappingProxyType, (dict(obj),), obj=obj) @@ -1362,3 +1368,7 @@ def _get_bases(typ): # For regular class objects bases_attr = '__bases__' return getattr(typ, bases_attr) + + +def _make_dict_keys(obj): + return dict.fromkeys(obj).keys() diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b1821dba6..b3780389e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -207,6 +207,10 @@ def test_memoryview(self): self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol), buffer_obj.tobytes()) + def test_dict_keys(self): + keys = {"a": 1, "b": 2}.keys() + self.assertEqual(pickle_depickle(keys), keys) + def test_sliced_and_non_contiguous_memoryview(self): buffer_obj = memoryview(b"Hello!" * 3)[2:15:2] self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol), From 9d232034ecdcf41689eafe78820318ab7461d9ef Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Sun, 21 Jun 2020 13:28:44 -0500 Subject: [PATCH 2/8] Add pickling of dict_values, dict_items --- cloudpickle/cloudpickle.py | 18 ++++++++++++++++++ tests/cloudpickle_test.py | 8 ++++++++ 2 files changed, 26 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 24cd9c74d..8ec4a3797 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -987,6 +987,16 @@ def save_dict_keys(self, obj): dispatch[_collections_abc.dict_keys] = save_dict_keys + def save_dict_values(self, obj): + self.save_reduce(_make_dict_values, (list(obj), )) + + dispatch[_collections_abc.dict_values] = save_dict_values + + def save_dict_items(self, obj): + self.save_reduce(_make_dict_items, (list(obj), )) + + dispatch[_collections_abc.dict_items] = save_dict_items + if hasattr(types, "MappingProxyType"): # pragma: no branch def save_mappingproxy(self, obj): self.save_reduce(types.MappingProxyType, (dict(obj),), obj=obj) @@ -1372,3 +1382,11 @@ def _get_bases(typ): def _make_dict_keys(obj): return dict.fromkeys(obj).keys() + + +def _make_dict_values(obj): + return {_.__hash__: _ for _ in obj}.values() + + +def _make_dict_items(obj): + return dict(obj).items() diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b3780389e..92c36ae75 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -211,6 +211,14 @@ def test_dict_keys(self): keys = {"a": 1, "b": 2}.keys() self.assertEqual(pickle_depickle(keys), keys) + def test_dict_values(self): + values = {"a": 1, "b": 2}.values() + self.assertEqual(list(pickle_depickle(values)), list(values)) + + def test_dict_items(self): + items = {"a": 1, "b": 2}.items() + self.assertEqual(pickle_depickle(items), items) + def test_sliced_and_non_contiguous_memoryview(self): buffer_obj = memoryview(b"Hello!" * 3)[2:15:2] self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol), From 16fa5e8b1bace95e1b7054a5d1d08abf5e7a072b Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Sun, 21 Jun 2020 14:06:36 -0500 Subject: [PATCH 3/8] Add reducers for dict views to cloudpickle_fast --- cloudpickle/cloudpickle_fast.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index fb9ab9b3d..ee3a6a149 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -10,6 +10,7 @@ guards present in cloudpickle.py that were written to handle PyPy specificities are not present in cloudpickle_fast.py """ +import _collections_abc import abc import copyreg import io @@ -30,6 +31,7 @@ _builtin_type, Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases, + _make_dict_keys, _make_dict_values, _make_dict_items, ) load, loads = _pickle.load, _pickle.loads @@ -339,6 +341,18 @@ def _class_reduce(obj): return NotImplemented +def _dict_keys_reduce(obj): + return _make_dict_keys, (list(obj),) + + +def _dict_values_reduce(obj): + return _make_dict_values, (list(obj),) + + +def _dict_items_reduce(obj): + return _make_dict_items, (list(obj),) + + # COLLECTIONS OF OBJECTS STATE SETTERS # ------------------------------------ # state setters are called at unpickling time, once the object is created and @@ -425,6 +439,9 @@ class CloudPickler(Pickler): dispatch[types.MappingProxyType] = _mappingproxy_reduce dispatch[weakref.WeakSet] = _weakset_reduce dispatch[typing.TypeVar] = _typevar_reduce + dispatch[_collections_abc.dict_keys] = _dict_keys_reduce + dispatch[_collections_abc.dict_values] = _dict_values_reduce + dispatch[_collections_abc.dict_items] = _dict_items_reduce def __init__(self, file, protocol=None, buffer_callback=None): if protocol is None: From d5a74eca89083dcd12f7eec946a4aea857974ba8 Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Sun, 21 Jun 2020 14:11:18 -0500 Subject: [PATCH 4/8] Compare sorted values --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 92c36ae75..eadb694ce 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -213,7 +213,7 @@ def test_dict_keys(self): def test_dict_values(self): values = {"a": 1, "b": 2}.values() - self.assertEqual(list(pickle_depickle(values)), list(values)) + self.assertEqual(sorted(pickle_depickle(values)), sorted(values)) def test_dict_items(self): items = {"a": 1, "b": 2}.items() From 7d383550e9a9ad5135c0d470e294718bf837aa60 Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Sun, 21 Jun 2020 19:46:53 -0500 Subject: [PATCH 5/8] Test types for dict views after depickle --- tests/cloudpickle_test.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index eadb694ce..afc6f2031 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1,5 +1,6 @@ from __future__ import division +import _collections_abc import abc import collections import base64 @@ -209,15 +210,21 @@ def test_memoryview(self): def test_dict_keys(self): keys = {"a": 1, "b": 2}.keys() - self.assertEqual(pickle_depickle(keys), keys) + results = pickle_depickle(keys) + self.assertEqual(results, keys) + assert isinstance(results, _collections_abc.dict_keys) def test_dict_values(self): values = {"a": 1, "b": 2}.values() - self.assertEqual(sorted(pickle_depickle(values)), sorted(values)) + results = pickle_depickle(values) + self.assertEqual(sorted(results), sorted(values)) + assert isinstance(results, _collections_abc.dict_values) def test_dict_items(self): items = {"a": 1, "b": 2}.items() - self.assertEqual(pickle_depickle(items), items) + results = pickle_depickle(items) + self.assertEqual(results, items) + assert isinstance(results, _collections_abc.dict_items) def test_sliced_and_non_contiguous_memoryview(self): buffer_obj = memoryview(b"Hello!" * 3)[2:15:2] From 4e45b1bb88958baa837a4dd66960ba09be2ef1c8 Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Sun, 21 Jun 2020 20:05:02 -0500 Subject: [PATCH 6/8] Catch RuntimeError for numpy import --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index afc6f2031..a6f16355b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -33,7 +33,7 @@ # tests should be skipped if these modules are not available import numpy as np import scipy.special as spp -except ImportError: +except (ImportError, RuntimeError): np = None spp = None From 0742b53ad07b81d34f9920458c07c425bef43c82 Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Sun, 21 Jun 2020 20:10:18 -0500 Subject: [PATCH 7/8] Minor change to _make_dict_values --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 8ec4a3797..37b56f9dc 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1385,7 +1385,7 @@ def _make_dict_keys(obj): def _make_dict_values(obj): - return {_.__hash__: _ for _ in obj}.values() + return {i: _ for i, _ in enumerate(obj)}.values() def _make_dict_items(obj): From 8eb737cf8b2695e4ba5469e42ffb715bae2c4f10 Mon Sep 17 00:00:00 2001 From: Brian Larsen Date: Mon, 29 Jun 2020 10:02:52 -0500 Subject: [PATCH 8/8] Update dict_items handling, add some comments. --- cloudpickle/cloudpickle.py | 12 +++++++++--- cloudpickle/cloudpickle_fast.py | 8 +++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 37b56f9dc..b74a9f4c8 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -983,17 +983,23 @@ def save_root_logger(self, obj): dispatch[logging.RootLogger] = save_root_logger def save_dict_keys(self, obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information self.save_reduce(_make_dict_keys, (list(obj),)) dispatch[_collections_abc.dict_keys] = save_dict_keys def save_dict_values(self, obj): - self.save_reduce(_make_dict_values, (list(obj), )) + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + self.save_reduce(_make_dict_values, (list(obj),)) dispatch[_collections_abc.dict_values] = save_dict_values def save_dict_items(self, obj): - self.save_reduce(_make_dict_items, (list(obj), )) + self.save_reduce(_make_dict_items, (dict(obj),)) dispatch[_collections_abc.dict_items] = save_dict_items @@ -1389,4 +1395,4 @@ def _make_dict_values(obj): def _make_dict_items(obj): - return dict(obj).items() + return obj.items() diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index ee3a6a149..406d40858 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -342,15 +342,21 @@ def _class_reduce(obj): def _dict_keys_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information return _make_dict_keys, (list(obj),) def _dict_values_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information return _make_dict_values, (list(obj),) def _dict_items_reduce(obj): - return _make_dict_items, (list(obj),) + return _make_dict_items, (dict(obj),) # COLLECTIONS OF OBJECTS STATE SETTERS