diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d30e8b2..e144e6fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ DeepDiff Change log +- v5-2-2: Fixed Delta serialization when None type is present. - v5-2-0: Removed Murmur3 as the preferred hashing method. Using SHA256 by default now. Added commandline for deepdiff. Added group_by. Added math_epsilon. Improved ignoring of NoneType. - v5-0-2: Bug Fix NoneType in ignore type groups https://github.com/seperman/deepdiff/issues/207 - v5-0-1: Bug fix to not apply format to non numbers. diff --git a/README.md b/README.md index 1896931b..41ff975e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 5.2.1 +# DeepDiff v 5.2.2 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -18,7 +18,7 @@ Tested on Python 3.6+ and PyPy3. **NOTE: The last version of DeepDiff to work on Python 3.5 was DeepDiff 5-0-2** -- [Documentation](https://zepworks.com/deepdiff/5.2.1/) +- [Documentation](https://zepworks.com/deepdiff/5.2.2/) ## Installation @@ -54,13 +54,13 @@ Note: if you want to use DeepDiff via commandline, make sure to run `pip install DeepDiff gets the difference of 2 objects. -> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.2.1/diff.html) -> - The full documentation of all modules can be found on +> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.2.2/diff.html) +> - The full documentation of all modules can be found on > - Tutorials and posts about DeepDiff can be found on ## A few Examples -> Note: This is just a brief overview of what DeepDiff can do. Please visit for full documentation. +> Note: This is just a brief overview of what DeepDiff can do. Please visit for full documentation. ### List difference ignoring order or duplicates @@ -264,8 +264,8 @@ Example: ``` -> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.2.1/diff.html) -> - The full documentation can be found on +> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.2.2/diff.html) +> - The full documentation can be found on # Deep Search @@ -297,8 +297,8 @@ And you can pass all the same kwargs as DeepSearch to grep too: {'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} ``` -> - Please take a look at the [DeepSearch docs](https://zepworks.com/deepdiff/5.2.1/dsearch.html) -> - The full documentation can be found on +> - Please take a look at the [DeepSearch docs](https://zepworks.com/deepdiff/5.2.2/dsearch.html) +> - The full documentation can be found on # Deep Hash (New in v4-0-0) @@ -306,8 +306,8 @@ And you can pass all the same kwargs as DeepSearch to grep too: DeepHash is designed to give you hash of ANY python object based on its contents even if the object is not considered hashable! DeepHash is supposed to be deterministic in order to make sure 2 objects that contain the same data, produce the same hash. -> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.2.1/deephash.html) -> - The full documentation can be found on +> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.2.2/deephash.html) +> - The full documentation can be found on Let's say you have a dictionary object. @@ -355,8 +355,8 @@ Which you can write as: At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.2.1/deephash.html) -> - The full documentation can be found on +> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.2.2/deephash.html) +> - The full documentation can be found on # Using DeepDiff in unit tests diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index ea96d35a..c152df3f 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes.""" # flake8: noqa -__version__ = '5.2.1' +__version__ = '5.2.2' import logging if __name__ == '__main__': diff --git a/deepdiff/commands.py b/deepdiff/commands.py index a49e3482..86daee40 100644 --- a/deepdiff/commands.py +++ b/deepdiff/commands.py @@ -48,6 +48,7 @@ def cli(): @click.option('--significant-digits', required=False, default=None, type=int, show_default=True) @click.option('--truncate-datetime', required=False, type=click.Choice(['second', 'minute', 'hour', 'day'], case_sensitive=True), show_default=True, default=None) @click.option('--verbose-level', required=False, default=1, type=click.IntRange(0, 2), show_default=True) +@click.option('--debug', is_flag=True, show_default=False) def diff( *args, **kwargs ): @@ -59,6 +60,7 @@ def diff( T1 and T2 are the path to the files to be compared with each other. """ + debug = kwargs.pop('debug') kwargs['ignore_private_variables'] = not kwargs.pop('include_private_variables') kwargs['progress_logger'] = logger.info if kwargs['progress_logger'] == 'info' else logger.error create_patch = kwargs.pop('create_patch') @@ -71,7 +73,10 @@ def diff( try: kwargs[name] = load_path_content(t_path, file_type=t_extension) except Exception as e: # pragma: no cover. - sys.exit(str(f"Error when loading {name}: {e}")) # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(str(f"Error when loading {name}: {e}")) # pragma: no cover. # if (t1_extension != t2_extension): if t1_extension in {'csv', 'tsv'}: @@ -92,7 +97,10 @@ def diff( try: delta = Delta(diff) except Exception as e: # pragma: no cover. - sys.exit(f"Error when loading the patch (aka delta): {e}") # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(f"Error when loading the patch (aka delta): {e}") # pragma: no cover. # printing into stdout sys.stdout.buffer.write(delta.dumps()) @@ -105,8 +113,9 @@ def diff( @click.argument('delta_path', type=click.Path(exists=True, resolve_path=True)) @click.option('--backup', '-b', is_flag=True, show_default=True) @click.option('--raise-errors', is_flag=True, show_default=True) +@click.option('--debug', is_flag=True, show_default=False) def patch( - path, delta_path, backup, raise_errors + path, delta_path, backup, raise_errors, debug ): """ Deep Patch Commandline @@ -123,7 +132,10 @@ def patch( try: delta = Delta(delta_path=delta_path, raise_errors=raise_errors) except Exception as e: # pragma: no cover. - sys.exit(str(f"Error when loading the patch (aka delta) {delta_path}: {e}")) # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(str(f"Error when loading the patch (aka delta) {delta_path}: {e}")) # pragma: no cover. extension = path.split('.')[-1] @@ -137,7 +149,10 @@ def patch( try: save_content_to_path(result, path, file_type=extension, keep_backup=backup) except Exception as e: # pragma: no cover. - sys.exit(str(f"Error when saving {path}: {e}")) # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(str(f"Error when saving {path}: {e}")) # pragma: no cover. @cli.command() @@ -148,7 +163,8 @@ def patch( @click.option('--exclude-paths', required=False, type=str, show_default=False, multiple=True) @click.option('--exclude-regex-paths', required=False, type=str, show_default=False, multiple=True) @click.option('--verbose-level', required=False, default=1, type=click.IntRange(0, 2), show_default=True) -def grep(item, path, **kwargs): +@click.option('--debug', is_flag=True, show_default=False) +def grep(item, path, debug, **kwargs): """ Deep Grep Commandline @@ -162,19 +178,26 @@ def grep(item, path, **kwargs): try: content = load_path_content(path) except Exception as e: # pragma: no cover. - sys.exit(str(f"Error when loading {path}: {e}")) # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(str(f"Error when loading {path}: {e}")) # pragma: no cover. try: result = DeepSearch(content, item, **kwargs) except Exception as e: # pragma: no cover. - sys.exit(str(f"Error when running deep search on {path}: {e}")) # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(str(f"Error when running deep search on {path}: {e}")) # pragma: no cover. pprint(result, indent=2) @cli.command() @click.argument('path_inside', required=True, type=str) @click.argument('path', type=click.Path(exists=True, resolve_path=True)) -def extract(path_inside, path): +@click.option('--debug', is_flag=True, show_default=False) +def extract(path_inside, path, debug): """ Deep Extract Commandline @@ -185,10 +208,16 @@ def extract(path_inside, path): try: content = load_path_content(path) except Exception as e: # pragma: no cover. - sys.exit(str(f"Error when loading {path}: {e}")) # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(str(f"Error when loading {path}: {e}")) # pragma: no cover. try: result = deep_extract(content, path_inside) except Exception as e: # pragma: no cover. - sys.exit(str(f"Error when running deep search on {path}: {e}")) # pragma: no cover. + if debug: # pragma: no cover. + raise # pragma: no cover. + else: # pragma: no cover. + sys.exit(str(f"Error when running deep search on {path}: {e}")) # pragma: no cover. pprint(result, indent=2) diff --git a/deepdiff/delta.py b/deepdiff/delta.py index adf16879..81e34e7c 100644 --- a/deepdiff/delta.py +++ b/deepdiff/delta.py @@ -69,6 +69,11 @@ def __init__( serializer=pickle_dump, verify_symmetry=False, ): + if 'safe_to_import' not in set(deserializer.__code__.co_varnames): + def _deserializer(obj, safe_to_import=None): + return deserializer(obj) + else: + _deserializer = deserializer if diff is not None: if isinstance(diff, DeepDiff): @@ -76,17 +81,17 @@ def __init__( elif isinstance(diff, Mapping): self.diff = diff elif isinstance(diff, strings): - self.diff = deserializer(diff, safe_to_import=safe_to_import) + self.diff = _deserializer(diff, safe_to_import=safe_to_import) elif delta_path: with open(delta_path, 'rb') as the_file: content = the_file.read() - self.diff = deserializer(content, safe_to_import=safe_to_import) + self.diff = _deserializer(content, safe_to_import=safe_to_import) elif delta_file: try: content = delta_file.read() except UnicodeDecodeError as e: raise ValueError(BINIARY_MODE_NEEDED_MSG.format(e)) from None - self.diff = deserializer(content, safe_to_import=safe_to_import) + self.diff = _deserializer(content, safe_to_import=safe_to_import) else: raise ValueError(DELTA_AT_LEAST_ONE_ARG_NEEDED) @@ -512,7 +517,16 @@ def dump(self, file): """ Dump into file object """ - file.write(self.dumps()) + # Small optimization: Our internal pickle serializer can just take a file object + # and directly write to it. However if a user defined serializer is passed + # we want to make it compatible with the expectation that self.serializer(self.diff) + # will give the user the serialization and then it can be written to + # a file object when using the dump(file) function. + param_names_of_serializer = set(self.serializer.__code__.co_varnames) + if 'file_obj' in param_names_of_serializer: + self.serializer(self.diff, file_obj=file) + else: + file.write(self.dumps()) def dumps(self): """ diff --git a/deepdiff/serialization.py b/deepdiff/serialization.py index 13b81be0..4096387d 100644 --- a/deepdiff/serialization.py +++ b/deepdiff/serialization.py @@ -39,6 +39,8 @@ class UnsupportedFormatErr(TypeError): pass +NONE_TYPE = type(None) + CSV_HEADER_MAX_CHUNK_SIZE = 2048 # The chunk needs to be big enough that covers a couple of rows of data. @@ -254,10 +256,40 @@ def find_class(self, module, name): # Forbid everything else. raise ForbiddenModule(FORBIDDEN_MODULE_MSG.format(module_dot_class)) from None + def persistent_load(self, persistent_id): + if persistent_id == "<>": + return type(None) + + +class _RestrictedPickler(pickle.Pickler): + def persistent_id(self, obj): + if obj is NONE_TYPE: # NOQA + return "<>" + return None + -def pickle_dump(obj): +def pickle_dump(obj, file_obj=None): + """ + **pickle_dump** + Dumps the obj into pickled content. + + **Parameters** + + obj : Any python object + + file_obj : (Optional) A file object to dump the contents into + + **Returns** + + If file_obj is passed the return value will be None. It will write the object's pickle contents into the file. + However if no file_obj is passed, then it will return the pickle serialization of the obj in the form of bytes. + """ + file_obj_passed = bool(file_obj) + file_obj = file_obj or io.BytesIO() # We expect at least python 3.5 so protocol 4 is good. - return pickle.dumps(obj, protocol=4, fix_imports=False) + _RestrictedPickler(file_obj, protocol=4, fix_imports=False).dump(obj) + if not file_obj_passed: + return file_obj.getvalue() def pickle_load(content, safe_to_import=None): @@ -406,8 +438,7 @@ def _save_content(content, path, file_type, keep_backup=True): content = toml.dump(content, the_file) elif file_type == 'pickle': with open(path, 'wb') as the_file: - content = pickle_dump(content) - the_file.write(content) + content = pickle_dump(content, file_obj=the_file) elif file_type in {'csv', 'tsv'}: if clevercsv is None: # pragma: no cover. raise ImportError('CleverCSV needs to be installed.') # pragma: no cover. diff --git a/docs/changelog.rst b/docs/changelog.rst index 96ac336d..61350720 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -5,6 +5,7 @@ Changelog DeepDiff Changelog +- v5-2-2: Fixed Delta serialization when None type is present. - v5-2-0: Removed Murmur3 as the preferred hashing method. Using SHA256 by default now. Added commandline for deepdiff. Added group_by. Added math_epsilon. Improved ignoring of NoneType. - v5-0-2: Bug Fix NoneType in ignore type groups https://github.com/seperman/deepdiff/issues/207 - v5-0-1: Bug fix to not apply format to non numbers. diff --git a/docs/conf.py b/docs/conf.py index dd2a5b3c..8a253f09 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '5.2.1' +version = '5.2.2' # The full version, including alpha/beta/rc tags. -release = '5.2.1' +release = '5.2.2' load_dotenv(override=True) DOC_VERSION = os.environ.get('DOC_VERSION', version) diff --git a/docs/delta.rst b/docs/delta.rst index c12f8841..862baeab 100644 --- a/docs/delta.rst +++ b/docs/delta.rst @@ -145,6 +145,45 @@ DeepDiff by default uses a restricted Python pickle function to deserialize the The user of Delta can decide to switch the serializer and deserializer to their custom ones. The serializer and deserializer parameters can be used exactly for that reason. The best way to come up with your own serializer and deserialier is to take a look at the `pickle_dump and pickle_load functions in the serializer module `_ +.. _delta_json_deserializer_label: + +Json Deserializer for Delta +``````````````````````````` + +If all you deal with are Json serializable objects, you can use json for serialization. + +>>> from deepdiff import DeepDiff, Delta +>>> import json +>>> t1 = {"a": 1} +>>> t2 = {"a": 2} +>>> +>>> diff = DeepDiff(t1, t2) +>>> delta = Delta(diff, serializer=json.dumps) +>>> dump = delta.dumps() +>>> dump +'{"values_changed": {"root[\'a\']": {"new_value": 2}}}' +>>> delta_reloaded = Delta(dump, deserializer=json.loads) +>>> t2 == delta_reloaded + t1 +True + + +.. note:: + + Json is very limited and easily you can get to deltas that are not json serializable. You will probably want to extend the Python's Json serializer to support your needs. + + >>> t1 = {"a": 1} + >>> t2 = {"a": None} + >>> diff = DeepDiff(t1, t2) + >>> diff + {'type_changes': {"root['a']": {'old_type': , 'new_type': , 'old_value': 1, 'new_value': None}}} + >>> Delta(diff, serializer=json.dumps) + , 'new_type': , 'new_v...}> + >>> delta = Delta(diff, serializer=json.dumps) + >>> dump = delta.dumps() + Traceback (most recent call last): + File "lib/python3.8/json/encoder.py", line 179, in default + raise TypeError(f'Object of type {o.__class__.__name__} ' + TypeError: Object of type type is not JSON serializable .. _delta_serializer_label: @@ -162,6 +201,7 @@ Delta by default uses Python's pickle to serialize and deserialize. While the un In fact only a few Python object types are allowed by default. The user of DeepDiff can pass additional types using the :ref:`delta_safe_to_import_label` to allow further object types that need to be allowed. + .. _delta_mutate_label: Delta Mutate parameter @@ -331,9 +371,27 @@ At the time of writing this document, this list consists of: If you want to pass any other argument to safe_to_import, you will need to put the full path to the type as it appears in the sys.modules -For example let's say you have a package call mypackage and has a module called mymodule. If you check the sys.modules, the address to this module must be mypackage.mymodule. In order for Delta to be able to serialize this object, first of all it has to be `picklable `_. Then you can pass: +For example let's say you have a package call mypackage and has a module called mymodule. If you check the sys.modules, the address to this module must be mypackage.mymodule. In order for Delta to be able to serialize this object via pickle, first of all it has to be `picklable `_. + +>>> diff = DeepDiff(t1, t2) +>>> delta = Delta(diff) +>>> dump = delta.dumps() + +The dump at this point is serialized via Pickle and can be written to disc if needed. + +Later when you want to load this dump, by default Delta will block you from importing anything that is NOT in deepdiff.serialization.SAFE_TO_IMPORT . In fact it will show you this error message when trying to load this dump: + + deepdiff.serialization.ForbiddenModule: Module 'builtins.type' is forbidden. You need to explicitly pass it by passing a safe_to_import parameter + +In order to let Delta know that this specific module is safe to import, you will need to pass it to Delta during loading of this dump: + +>>> delta = Delta(dump, safe_to_import={'mypackage.mymodule'}) + +.. note :: + + If you pass a custom deserializer to Delta, DeepDiff will pass safe_to_import parameter to the custom deserializer if that deserializer takes safe_to_import as a parameter in its definition. + For example if you just use json.loads as deserializer, the safe_to_import items won't be passed to it since json.loads does not have such a parameter. ->>> delta = Delta(t1, t2, safe_to_import={'mypackage.mymodule'}) .. _delta_verify_symmetry_label: diff --git a/docs/index.rst b/docs/index.rst index 387ed4d5..2a2e1cf5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 5.2.1 documentation! +DeepDiff 5.2.2 documentation! ============================= ***************** diff --git a/setup.cfg b/setup.cfg index 77d966b0..cfdfe5f0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.2.1 +current_version = 5.2.2 commit = True tag = True tag_name = {new_version} diff --git a/setup.py b/setup.py index b2a5b655..a120132f 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ if os.environ.get('USER', '') == 'vagrant': del os.link -version = '5.2.1' +version = '5.2.2' def get_reqs(filename): diff --git a/tests/test_delta.py b/tests/test_delta.py index af0b7529..e91a6463 100644 --- a/tests/test_delta.py +++ b/tests/test_delta.py @@ -1,5 +1,7 @@ import pytest import os +import io +import json from decimal import Decimal from unittest import mock from deepdiff import Delta, DeepDiff @@ -1268,3 +1270,28 @@ def test_ignore_order_but_not_report_repetition(self): Delta(DeepDiff(t1, t2, ignore_order=True)) assert DELTA_IGNORE_ORDER_NEEDS_REPETITION_REPORT == str(excinfo.value) + + def test_none_in_delta_object(self): + t1 = {"a": None} + t2 = {"a": 1} + + dump = Delta(DeepDiff(t1, t2)).dumps() + delta = Delta(dump) + assert t2 == delta + t1 + + def test_delta_with_json_serializer(self): + t1 = {"a": 1} + t2 = {"a": 2} + + diff = DeepDiff(t1, t2) + delta = Delta(diff, serializer=json.dumps) + dump = delta.dumps() + delta_reloaded = Delta(dump, deserializer=json.loads) + assert t2 == delta_reloaded + t1 + + the_file = io.StringIO() + delta.dump(the_file) + the_file.seek(0) + + delta_reloaded_again = Delta(delta_file=the_file, deserializer=json.loads) + assert t2 == delta_reloaded_again + t1