diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d168fcca2..8596cf8ec 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -180,6 +180,32 @@ def _builtin_type(name): return getattr(types, name) +def _make__new__factory(type_): + def _factory(): + return type_.__new__ + return _factory + + +# NOTE: These need to be module globals so that they're pickleable as globals. +_get_dict_new = _make__new__factory(dict) +_get_frozenset_new = _make__new__factory(frozenset) +_get_list_new = _make__new__factory(list) +_get_set_new = _make__new__factory(set) +_get_tuple_new = _make__new__factory(tuple) +_get_object_new = _make__new__factory(object) + +# Pre-defined set of builtin_function_or_method instances that can be +# serialized. +_BUILTIN_TYPE_CONSTRUCTORS = { + dict.__new__: _get_dict_new, + frozenset.__new__: _get_frozenset_new, + set.__new__: _get_set_new, + list.__new__: _get_list_new, + tuple.__new__: _get_tuple_new, + object.__new__: _get_object_new, +} + + if sys.version_info < (3, 4): def _walk_global_ops(code): """ @@ -306,6 +332,18 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ + if obj in _BUILTIN_TYPE_CONSTRUCTORS: + # We keep a special-cased cache of built-in type constructors at + # global scope, because these functions are structured very + # differently in different python versions and implementations (for + # example, they're instances of types.BuiltinFunctionType in + # CPython, but they're ordinary types.FunctionType instances in + # PyPy). + # + # If the function we've received is in that cache, we just + # serialize it as a lookup into the cache. + return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj) + write = self.write if name is None: @@ -332,7 +370,7 @@ def save_function(self, obj, name=None): return self.save_global(obj, name) # a builtin_function_or_method which comes in as an attribute of some - # object (e.g., object.__new__, itertools.chain.from_iterable) will end + # object (e.g., itertools.chain.from_iterable) will end # up with modname "__main__" and so end up here. But these functions # have no __code__ attribute in CPython, so the handling for # user-defined functions below will fail. @@ -408,15 +446,18 @@ def save_dynamic_class(self, obj): from global modules. """ clsdict = dict(obj.__dict__) # copy dict proxy to a dict - if not isinstance(clsdict.get('__dict__', None), property): - # don't extract dict that are properties - clsdict.pop('__dict__', None) - clsdict.pop('__weakref__', None) + clsdict.pop('__weakref__', None) - # hack as __new__ is stored differently in the __dict__ - new_override = clsdict.get('__new__', None) - if new_override: - clsdict['__new__'] = obj.__new__ + # On PyPy, __doc__ is a readonly attribute, so we need to include it in + # the initial skeleton class. This is safe because we know that the + # doc can't participate in a cycle with the original class. + type_kwargs = {'__doc__': clsdict.pop('__doc__', None)} + + # If type overrides __dict__ as a property, include it in the type kwargs. + # In Python 2, we can't set this attribute after construction. + __dict__ = clsdict.pop('__dict__', None) + if isinstance(__dict__, property): + type_kwargs['__dict__'] = __dict__ save = self.save write = self.write @@ -439,17 +480,12 @@ def save_dynamic_class(self, obj): # Mark the start of the args for the rehydration function. write(pickle.MARK) - # On PyPy, __doc__ is a readonly attribute, so we need to include it in - # the initial skeleton class. This is safe because we know that the - # doc can't participate in a cycle with the original class. - doc_dict = {'__doc__': clsdict.pop('__doc__', None)} - # Create and memoize an empty class with obj's name and bases. save(type(obj)) save(( obj.__name__, obj.__bases__, - doc_dict, + type_kwargs, )) write(pickle.REDUCE) self.memoize(obj) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index db3ce57dc..06d0460a1 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1,7 +1,7 @@ from __future__ import division import abc - +import collections import base64 import functools import imp @@ -701,5 +701,27 @@ def test_function_module_name(self): func = lambda x: x self.assertEqual(pickle_depickle(func).__module__, func.__module__) + def test_namedtuple(self): + + MyTuple = collections.namedtuple('MyTuple', ['a', 'b', 'c']) + t = MyTuple(1, 2, 3) + + depickled_t, depickled_MyTuple = pickle_depickle([t, MyTuple]) + self.assertTrue(isinstance(depickled_t, depickled_MyTuple)) + + self.assertEqual((depickled_t.a, depickled_t.b, depickled_t.c), (1, 2, 3)) + self.assertEqual((depickled_t[0], depickled_t[1], depickled_t[2]), (1, 2, 3)) + + self.assertEqual(depickled_MyTuple.__name__, 'MyTuple') + self.assertTrue(issubclass(depickled_MyTuple, tuple)) + + def test_builtin_type__new__(self): + # Functions occasionally take the __new__ of these types as default + # parameters for factories. For example, on Python 3.3, + # `tuple.__new__` is a default value for some methods of namedtuple. + for t in list, tuple, set, frozenset, dict, object: + self.assertTrue(pickle_depickle(t.__new__) is t.__new__) + + if __name__ == '__main__': unittest.main()