From 409921b9210948be354b66b9984ed4ed7001d262 Mon Sep 17 00:00:00 2001 From: Dustin Torres Date: Mon, 12 Apr 2021 20:57:22 -0700 Subject: [PATCH 1/2] This allows for a custom compare function to compare iterable items. The new compare function takes two items of an iterable and should return True it matching, False if no match, and raise CannotCompare if unable to compare the two items. The default behavior is the same as before which is comparing each item in order. If the compare function raises CannotCompare then behavior reverts back to the default in order. This also introduces a new report key which is `iterable_item_moved` to track if iterable items have moved. --- deepdiff/delta.py | 26 +++++- deepdiff/diff.py | 131 +++++++++++++++++++++-------- deepdiff/helper.py | 7 ++ deepdiff/model.py | 31 +++++-- tests/test_ignore_order.py | 167 ++++++++++++++++++++++++++++++++++++- 5 files changed, 314 insertions(+), 48 deletions(-) diff --git a/deepdiff/delta.py b/deepdiff/delta.py index 9e6fd78c..077c6ab5 100644 --- a/deepdiff/delta.py +++ b/deepdiff/delta.py @@ -260,9 +260,14 @@ def _del_elem(self, parent, parent_to_obj_elem, parent_to_obj_action, value=obj, action=parent_to_obj_action) def _do_iterable_item_added(self): - iterable_item_added = self.diff.get('iterable_item_added') + iterable_item_added = self.diff.get('iterable_item_added', {}) + iterable_item_moved = self.diff.get('iterable_item_moved') + if iterable_item_moved: + added_dict = {v["new_path"]: v["new_value"] for k, v in iterable_item_moved.items()} + iterable_item_added.update(added_dict) + if iterable_item_added: - self._do_item_added(iterable_item_added) + self._do_item_added(iterable_item_added, insert=True) def _do_dictionary_item_added(self): dictionary_item_added = self.diff.get('dictionary_item_added') @@ -274,7 +279,7 @@ def _do_attribute_added(self): if attribute_added: self._do_item_added(attribute_added) - def _do_item_added(self, items, sort=True): + def _do_item_added(self, items, sort=True, insert=False): if sort: # sorting items by their path so that the items with smaller index # are applied first (unless `sort` is `False` so that order of @@ -289,6 +294,11 @@ def _do_item_added(self, items, sort=True): elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details else: continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198 + + # Insert is only true for iterables, make sure it is a valid index. + if(insert and elem < len(obj)): + obj.insert(elem, None) + self._set_new_value(parent, parent_to_obj_elem, parent_to_obj_action, obj, elements, path, elem, action, new_value) @@ -397,10 +407,18 @@ def _do_item_removed(self, items): self._do_verify_changes(path, expected_old_value, current_old_value) def _do_iterable_item_removed(self): - iterable_item_removed = self.diff.get('iterable_item_removed') + iterable_item_removed = self.diff.get('iterable_item_removed', {}) + + iterable_item_moved = self.diff.get('iterable_item_moved') + if iterable_item_moved: + # These will get added back during items_added + removed_dict = {k: v["new_value"] for k, v in iterable_item_moved.items()} + iterable_item_removed.update(removed_dict) + if iterable_item_removed: self._do_item_removed(iterable_item_removed) + def _do_dictionary_item_removed(self): dictionary_item_removed = self.diff.get('dictionary_item_removed') if dictionary_item_removed: diff --git a/deepdiff/diff.py b/deepdiff/diff.py index e3eb1e68..0d62252e 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -21,7 +21,7 @@ number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans, np_ndarray, get_numpy_ndarray_rows, OrderedSetPlus, RepeatedTimer, TEXT_VIEW, TREE_VIEW, DELTA_VIEW, - np, get_truncate_datetime, dict_) + np, get_truncate_datetime, dict_, CannotCompare) from deepdiff.serialization import SerializationMixin from deepdiff.distance import DistanceMixin from deepdiff.model import ( @@ -139,6 +139,7 @@ def __init__(self, truncate_datetime=None, verbose_level=1, view=TEXT_VIEW, + iterable_compare_func=None, _original_type=None, _parameters=None, _shared_parameters=None, @@ -154,7 +155,8 @@ def __init__(self, "view, hasher, hashes, max_passes, max_diffs, " "cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, " "cache_tuning_sample_size, get_deep_distance, group_by, cache_purge_level, " - "math_epsilon, _original_type, _parameters and _shared_parameters.") % ', '.join(kwargs.keys())) + "math_epsilon, iterable_compare_func, _original_type, " + "_parameters and _shared_parameters.") % ', '.join(kwargs.keys())) if _parameters: self.__dict__.update(_parameters) @@ -182,6 +184,7 @@ def __init__(self, self.ignore_string_case = ignore_string_case self.exclude_obj_callback = exclude_obj_callback self.number_to_string = number_to_string_func or number_to_string + self.iterable_compare_func = iterable_compare_func self.ignore_private_variables = ignore_private_variables self.ignore_nan_inequality = ignore_nan_inequality self.hasher = hasher @@ -558,6 +561,53 @@ def _diff_iterable(self, level, parents_ids=frozenset(), _original_type=None): else: self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type) + def _compare_in_order(self, level): + """ + Default compare if `iterable_compare_func` is not provided. + This will compare in sequence order. + """ + + return [((i, i), (x, y)) for i, (x, y) in enumerate( + zip_longest( + level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))] + + def _get_matching_pairs(self, level): + """ + Given a level get matching pairs. This returns list of two tuples in the form: + [ + (t1 index, t2 index), (t1 item, t2 item) + ] + + This will compare using the passed in `iterable_compare_func` if available. + Default it to compare in order + """ + + if(self.iterable_compare_func is None): + # Match in order if there is no compare function provided + return self._compare_in_order(level) + try: + matches = [] + y_matched = set() + for i, x in enumerate(level.t1): + x_found = False + for j, y in enumerate(level.t2): + + if(self.iterable_compare_func(x, y)): + y_matched.add(id(y)) + matches.append(((i, j), (x, y))) + x_found = True + break + + if(not x_found): + matches.append(((i, -1), (x, ListItemRemovedOrAdded))) + for j, y in enumerate(level.t2): + if(id(y) not in y_matched): + matches.append(((-1, j), (ListItemRemovedOrAdded, y))) + return matches + except CannotCompare: + return self._compare_in_order(level) + + def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type=None): # We're handling both subscriptable and non-subscriptable iterables. Which one is it? subscriptable = self._iterables_subscriptable(level.t1, level.t2) @@ -566,42 +616,53 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type else: child_relationship_class = NonSubscriptableIterableRelationship - for i, (x, y) in enumerate( - zip_longest( - level.t1, level.t2, fillvalue=ListItemRemovedOrAdded)): - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. - if y is ListItemRemovedOrAdded: # item removed completely - change_level = level.branch_deeper( - x, - notpresent, - child_relationship_class=child_relationship_class, - child_relationship_param=i) - self._report_result('iterable_item_removed', change_level) + for (i, j), (x, y) in self._get_matching_pairs(level): + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. - elif x is ListItemRemovedOrAdded: # new item added - change_level = level.branch_deeper( - notpresent, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=i) - self._report_result('iterable_item_added', change_level) - - else: # check if item value has changed - item_id = id(x) - if parents_ids and item_id in parents_ids: - continue - parents_ids_added = add_to_frozen_set(parents_ids, item_id) - - # Go one level deeper - next_level = level.branch_deeper( - x, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=i) - self._diff(next_level, parents_ids_added) + if y is ListItemRemovedOrAdded: # item removed completely + change_level = level.branch_deeper( + x, + notpresent, + child_relationship_class=child_relationship_class, + child_relationship_param=i) + self._report_result('iterable_item_removed', change_level) + + elif x is ListItemRemovedOrAdded: # new item added + change_level = level.branch_deeper( + notpresent, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=j) + self._report_result('iterable_item_added', change_level) + + else: # check if item value has changed + + if (i != j): + # Item moved + change_level = level.branch_deeper( + x, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=i, + child_relationship_param2=j + ) + self._report_result('iterable_item_moved', change_level) + + item_id = id(x) + if parents_ids and item_id in parents_ids: + continue + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + + # Go one level deeper + next_level = level.branch_deeper( + x, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=i) + self._diff(next_level, parents_ids_added) def _diff_str(self, level): """Compare strings""" diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 531a30b4..8901ea3c 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -190,6 +190,13 @@ def __repr__(self): __str__ = __repr__ +class CannotCompare(Exception): + """ + Exception when two items cannot be compared in the compare function. + """ + pass + + unprocessed = Unprocessed() skipped = Skipped() not_hashed = NotHashed() diff --git a/deepdiff/model.py b/deepdiff/model.py index ce933271..88696ec0 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -16,6 +16,7 @@ "unprocessed", "iterable_item_added", "iterable_item_removed", + "iterable_item_moved", "attribute_added", "attribute_removed", "set_item_removed", @@ -100,6 +101,7 @@ def __init__(self, tree_results=None, verbose_level=1): "unprocessed": [], "iterable_item_added": dict_(), "iterable_item_removed": dict_(), + "iterable_item_moved": dict_(), "attribute_added": self.__set_or_dict(), "attribute_removed": self.__set_or_dict(), "set_item_removed": PrettyOrderedSet(), @@ -126,6 +128,7 @@ def _from_tree_results(self, tree): self._from_tree_unprocessed(tree) self._from_tree_default(tree, 'iterable_item_added') self._from_tree_default(tree, 'iterable_item_removed') + self._from_tree_iterable_item_moved(tree) self._from_tree_default(tree, 'attribute_added') self._from_tree_default(tree, 'attribute_removed') self._from_tree_set_item_removed(tree) @@ -187,6 +190,13 @@ def _from_tree_value_changed(self, tree): if 'diff' in change.additional: the_changed.update({'diff': change.additional['diff']}) + def _from_tree_iterable_item_moved(self, tree): + if 'iterable_item_moved' in tree: + for change in tree['iterable_item_moved']: + the_changed = {'new_path': change.path(use_t2=True), 'new_value': change.t2} + self['iterable_item_moved'][change.path( + force=FORCE_DEFAULT)] = the_changed + def _from_tree_unprocessed(self, tree): if 'unprocessed' in tree: for change in tree['unprocessed']: @@ -244,6 +254,7 @@ def __init__(self, tree_results=None, ignore_order=None): "values_changed": dict_(), "iterable_item_added": dict_(), "iterable_item_removed": dict_(), + "iterable_item_moved": dict_(), "attribute_added": dict_(), "attribute_removed": dict_(), "set_item_removed": dict_(), @@ -273,6 +284,7 @@ def _from_tree_results(self, tree): else: self._from_tree_default(tree, 'iterable_item_added') self._from_tree_default(tree, 'iterable_item_removed') + self._from_tree_iterable_item_moved(tree) self._from_tree_default(tree, 'attribute_added') self._from_tree_default(tree, 'attribute_removed') self._from_tree_set_item_removed(tree) @@ -528,7 +540,7 @@ def __setattr__(self, key, value): def repetition(self): return self.additional['repetition'] - def auto_generate_child_rel(self, klass, param): + def auto_generate_child_rel(self, klass, param, param2=None): """ Auto-populate self.child_rel1 and self.child_rel2. This requires self.down to be another valid DiffLevel object. @@ -542,7 +554,7 @@ def auto_generate_child_rel(self, klass, param): klass=klass, parent=self.t1, child=self.down.t1, param=param) if self.down.t2 is not notpresent: self.t2_child_rel = ChildRelationship.create( - klass=klass, parent=self.t2, child=self.down.t2, param=param) + klass=klass, parent=self.t2, child=self.down.t2, param=param if param2 is None else param2) @property def all_up(self): @@ -572,7 +584,7 @@ def all_down(self): def _format_result(root, result): return None if result is None else "{}{}".format(root, result) - def path(self, root="root", force=None, get_parent_too=False): + def path(self, root="root", force=None, get_parent_too=False, use_t2=False): """ A python syntax string describing how to descend to this level, assuming the top level object is called root. Returns None if the path is not representable as a string. @@ -594,7 +606,7 @@ def path(self, root="root", force=None, get_parent_too=False): This will pretend all iterables are subscriptable, for example. """ # TODO: We could optimize this by building on top of self.up's path if it is cached there - cache_key = "{}{}".format(force, get_parent_too) + cache_key = "{}{}{}".format(force, get_parent_too, use_t2) if cache_key in self._path: cached = self._path[cache_key] if get_parent_too: @@ -609,7 +621,10 @@ def path(self, root="root", force=None, get_parent_too=False): # traverse all levels of this relationship while level and level is not self: # get this level's relationship object - next_rel = level.t1_child_rel or level.t2_child_rel # next relationship object to get a formatted param from + if(use_t2): + next_rel = level.t2_child_rel + else: + next_rel = level.t1_child_rel or level.t2_child_rel # next relationship object to get a formatted param from # t1 and t2 both are empty if next_rel is None: @@ -642,6 +657,7 @@ def create_deeper(self, new_t2, child_relationship_class, child_relationship_param=None, + child_relationship_param2=None, report_type=None): """ Start a new comparison level and correctly link it to this one. @@ -653,7 +669,7 @@ def create_deeper(self, new_t1, new_t2, down=None, up=level, report_type=report_type) level.down = result level.auto_generate_child_rel( - klass=child_relationship_class, param=child_relationship_param) + klass=child_relationship_class, param=child_relationship_param, param2=child_relationship_param2) return result def branch_deeper(self, @@ -661,6 +677,7 @@ def branch_deeper(self, new_t2, child_relationship_class, child_relationship_param=None, + child_relationship_param2=None, report_type=None): """ Branch this comparison: Do not touch this comparison line, but create a new one with exactly the same content, @@ -670,7 +687,7 @@ def branch_deeper(self, """ branch = self.copy() return branch.create_deeper(new_t1, new_t2, child_relationship_class, - child_relationship_param, report_type) + child_relationship_param, child_relationship_param2, report_type) def copy(self): """ diff --git a/tests/test_ignore_order.py b/tests/test_ignore_order.py index 748db3a7..14123b60 100644 --- a/tests/test_ignore_order.py +++ b/tests/test_ignore_order.py @@ -1,7 +1,7 @@ import pytest from unittest import mock -from deepdiff.helper import number_to_string -from deepdiff import DeepDiff +from deepdiff.helper import number_to_string, CannotCompare +from deepdiff import DeepDiff, Delta from decimal import Decimal from deepdiff.deephash import sha256hex from tests import CustomClass2 @@ -779,3 +779,166 @@ def test_ignore_order_and_group_by(self): diff2 = DeepDiff(t1, t2, group_by='id', ignore_order=True) expected2 = {'iterable_item_added': {"root['BB']['ate'][1]": 'Brownies'}} assert expected2 == diff2 + + def test_compare_func(self): + t1 = { + "Cars": [ + { + "id": "1", + "make": "Toyota", + "model": "Camry", + "dealers": [ + { + "id": 103, + "address": "103 Fake St", + "quantity": 50 + }, + { + "id": 105, + "address": "105 Fake St", + "quantity": 20 + } + ] + }, + { + "id": "2", + "make": "Toyota", + "model": "Highlander", + "dealers": [ + { + "id": 123, + "address": "123 Fake St", + "quantity": 50 + }, + { + "id": 125, + "address": "125 Fake St", + "quantity": 20 + } + ] + }, + { + "id": "3", + "make": "Toyota", + "model": "4Runner", + "model_numbers": [1, 2, 4] + }, + { + "id": "4", + "make": "Toyota", + "model": "supra", + "production": False + } + ] + } + + t2 = { + "Cars": [ + { + "id": "7", + "make": "Toyota", + "model": "8Runner" + }, + { + "id": "3", + "make": "Toyota", + "model": "4Runner", + "model_numbers": [1, 2, 3, 4] + }, + { + "id": "1", + "make": "Toyota", + "model": "Camry", + "dealers": [ + { + "id": 105, + "address": "105 Fake St", + "quantity": 50 + }, + { + "id": 200, + "address": "200 Fake St", + "quantity": 10 + } + ] + }, + { + "id": "4", + "make": "Toyota", + "model": "Supra", + "dealers": [ + { + "id": 123, + "address": "123 Fake St", + "quantity": 50 + }, + { + "id": 125, + "address": "125 Fake St", + "quantity": 20 + } + ] + } + ] + } + + + + def compare_func(x, y): + if(not isinstance(x, dict) or not isinstance(y, dict)): + raise CannotCompare + + if("id" not in x or "id" not in y): + raise CannotCompare + if(x["id"] == y["id"]): + return True + return False + + + ddiff = DeepDiff(t1, t2, iterable_compare_func=compare_func) + expected = {'dictionary_item_added': ["root['Cars'][3]['dealers']"], + 'dictionary_item_removed': ["root['Cars'][3]['production']"], + 'values_changed': {"root['Cars'][0]['dealers'][1]['quantity']": {'new_value': 50, + 'old_value': 20}, + "root['Cars'][2]['model_numbers'][2]": {'new_value': 3, 'old_value': 4}, + "root['Cars'][3]['model']": {'new_value': 'Supra', 'old_value': 'supra'}}, + 'iterable_item_added': {"root['Cars'][0]['dealers'][1]": {'id': 200, + 'address': '200 Fake St', + 'quantity': 10}, + "root['Cars'][2]['model_numbers'][3]": 4, + "root['Cars'][0]": {'id': '7', 'make': 'Toyota', 'model': '8Runner'}}, + 'iterable_item_removed': {"root['Cars'][0]['dealers'][0]": {'id': 103, + 'address': '103 Fake St', + 'quantity': 50}, + "root['Cars'][1]": {'id': '2', + 'make': 'Toyota', + 'model': 'Highlander', + 'dealers': [ + {'id': 123, 'address': '123 Fake St', + 'quantity': 50}, + {'id': 125, 'address': '125 Fake St', + 'quantity': 20}]}}, + 'iterable_item_moved': {"root['Cars'][0]": {'new_path': "root['Cars'][2]", + 'new_value': {'id': '1', + 'make': 'Toyota', + 'model': 'Camry', + 'dealers': [ + {'id': 105, 'address': '105 Fake St', + 'quantity': 50}, + {'id': 200, 'address': '200 Fake St', + 'quantity': 10}]}}, + "root['Cars'][0]['dealers'][1]": { + 'new_path': "root['Cars'][0]['dealers'][0]", + 'new_value': {'id': 105, 'address': '105 Fake St', + 'quantity': 50}}, + "root['Cars'][2]": {'new_path': "root['Cars'][1]", + 'new_value': {'id': '3', + 'make': 'Toyota', + 'model': '4Runner', + 'model_numbers': [1, 2, 3, 4]}}}} + + assert expected == ddiff + delta = Delta(ddiff) + recreated_t2 = t1 + delta + replay_diff = DeepDiff(recreated_t2, t2) + assert replay_diff.to_dict() == {} From d587cf798e4721e21868e76f36dda881d88abd5d Mon Sep 17 00:00:00 2001 From: Dustin Torres Date: Sun, 18 Apr 2021 16:45:46 -0700 Subject: [PATCH 2/2] Code review comments. - Use DeepHash instead of id when tracking elements - Add and cleanup unit tests - Pass in level object to compare in order to get path if desired --- conftest.py | 18 +++ deepdiff/diff.py | 101 +++++++++------ tests/fixtures/compare_func_result.json | 103 +++++++++++++++ tests/fixtures/compare_func_t1.json | 50 ++++++++ tests/fixtures/compare_func_t2.json | 49 +++++++ tests/test_delta.py | 69 +++++++++- tests/test_ignore_order.py | 162 ------------------------ 7 files changed, 347 insertions(+), 205 deletions(-) create mode 100644 tests/fixtures/compare_func_result.json create mode 100644 tests/fixtures/compare_func_t1.json create mode 100644 tests/fixtures/compare_func_t2.json diff --git a/conftest.py b/conftest.py index 71f01ca9..c07dd62f 100644 --- a/conftest.py +++ b/conftest.py @@ -62,3 +62,21 @@ def nested_b_t2(): def nested_b_result(): with open(os.path.join(FIXTURES_DIR, 'nested_b_result.json')) as the_file: return json.load(the_file) + + +@pytest.fixture(scope='class') +def compare_func_t1(): + with open(os.path.join(FIXTURES_DIR, 'compare_func_t1.json')) as the_file: + return json.load(the_file) + + +@pytest.fixture(scope='class') +def compare_func_t2(): + with open(os.path.join(FIXTURES_DIR, 'compare_func_t2.json')) as the_file: + return json.load(the_file) + + +@pytest.fixture(scope='class') +def compare_func_result(): + with open(os.path.join(FIXTURES_DIR, 'compare_func_result.json')) as the_file: + return json.load(the_file) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 0d62252e..debe1e99 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -588,12 +588,25 @@ def _get_matching_pairs(self, level): try: matches = [] y_matched = set() + y_index_matched = set() for i, x in enumerate(level.t1): x_found = False for j, y in enumerate(level.t2): - if(self.iterable_compare_func(x, y)): - y_matched.add(id(y)) + if(j in y_index_matched): + # This ensures a one-to-one relationship of matches from t1 to t2. + # If y this index in t2 has already been matched to another x + # it cannot have another match, so just continue. + continue + + if(self.iterable_compare_func(x, y, level)): + deep_hash = DeepHash(y, + hashes=self.hashes, + apply_hash=True, + **self.deephash_parameters, + ) + y_index_matched.add(j) + y_matched.add(deep_hash[y]) matches.append(((i, j), (x, y))) x_found = True break @@ -601,7 +614,13 @@ def _get_matching_pairs(self, level): if(not x_found): matches.append(((i, -1), (x, ListItemRemovedOrAdded))) for j, y in enumerate(level.t2): - if(id(y) not in y_matched): + + deep_hash = DeepHash(y, + hashes=self.hashes, + apply_hash=True, + **self.deephash_parameters, + ) + if(deep_hash[y] not in y_matched): matches.append(((-1, j), (ListItemRemovedOrAdded, y))) return matches except CannotCompare: @@ -616,53 +635,51 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type else: child_relationship_class = NonSubscriptableIterableRelationship - - for (i, j), (x, y) in self._get_matching_pairs(level): - if self._count_diff() is StopIteration: - return # pragma: no cover. This is already covered for addition. + if self._count_diff() is StopIteration: + return # pragma: no cover. This is already covered for addition. - if y is ListItemRemovedOrAdded: # item removed completely - change_level = level.branch_deeper( - x, - notpresent, - child_relationship_class=child_relationship_class, - child_relationship_param=i) - self._report_result('iterable_item_removed', change_level) + if y is ListItemRemovedOrAdded: # item removed completely + change_level = level.branch_deeper( + x, + notpresent, + child_relationship_class=child_relationship_class, + child_relationship_param=i) + self._report_result('iterable_item_removed', change_level) - elif x is ListItemRemovedOrAdded: # new item added - change_level = level.branch_deeper( - notpresent, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=j) - self._report_result('iterable_item_added', change_level) + elif x is ListItemRemovedOrAdded: # new item added + change_level = level.branch_deeper( + notpresent, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=j) + self._report_result('iterable_item_added', change_level) - else: # check if item value has changed - - if (i != j): - # Item moved - change_level = level.branch_deeper( - x, - y, - child_relationship_class=child_relationship_class, - child_relationship_param=i, - child_relationship_param2=j - ) - self._report_result('iterable_item_moved', change_level) - - item_id = id(x) - if parents_ids and item_id in parents_ids: - continue - parents_ids_added = add_to_frozen_set(parents_ids, item_id) + else: # check if item value has changed - # Go one level deeper - next_level = level.branch_deeper( + if (i != j): + # Item moved + change_level = level.branch_deeper( x, y, child_relationship_class=child_relationship_class, - child_relationship_param=i) - self._diff(next_level, parents_ids_added) + child_relationship_param=i, + child_relationship_param2=j + ) + self._report_result('iterable_item_moved', change_level) + + item_id = id(x) + if parents_ids and item_id in parents_ids: + continue + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + + # Go one level deeper + next_level = level.branch_deeper( + x, + y, + child_relationship_class=child_relationship_class, + child_relationship_param=i) + self._diff(next_level, parents_ids_added) def _diff_str(self, level): """Compare strings""" diff --git a/tests/fixtures/compare_func_result.json b/tests/fixtures/compare_func_result.json new file mode 100644 index 00000000..d3874f7a --- /dev/null +++ b/tests/fixtures/compare_func_result.json @@ -0,0 +1,103 @@ +{ + "dictionary_item_added": [ + "root['Cars'][3]['dealers']" + ], + "dictionary_item_removed": [ + "root['Cars'][3]['production']" + ], + "values_changed": { + "root['Cars'][0]['dealers'][1]['quantity']": { + "new_value": 50, + "old_value": 20 + }, + "root['Cars'][2]['model_numbers'][2]": { + "new_value": 3, + "old_value": 4 + }, + "root['Cars'][3]['model']": { + "new_value": "Supra", + "old_value": "supra" + } + }, + "iterable_item_added": { + "root['Cars'][0]['dealers'][1]": { + "id": 200, + "address": "200 Fake St", + "quantity": 10 + }, + "root['Cars'][2]['model_numbers'][3]": 4, + "root['Cars'][0]": { + "id": "7", + "make": "Toyota", + "model": "8Runner" + } + }, + "iterable_item_removed": { + "root['Cars'][0]['dealers'][0]": { + "id": 103, + "address": "103 Fake St", + "quantity": 50 + }, + "root['Cars'][1]": { + "id": "2", + "make": "Toyota", + "model": "Highlander", + "dealers": [ + { + "id": 123, + "address": "123 Fake St", + "quantity": 50 + }, + { + "id": 125, + "address": "125 Fake St", + "quantity": 20 + } + ] + } + }, + "iterable_item_moved": { + "root['Cars'][0]": { + "new_path": "root['Cars'][2]", + "new_value": { + "id": "1", + "make": "Toyota", + "model": "Camry", + "dealers": [ + { + "id": 105, + "address": "105 Fake St", + "quantity": 50 + }, + { + "id": 200, + "address": "200 Fake St", + "quantity": 10 + } + ] + } + }, + "root['Cars'][0]['dealers'][1]": { + "new_path": "root['Cars'][0]['dealers'][0]", + "new_value": { + "id": 105, + "address": "105 Fake St", + "quantity": 50 + } + }, + "root['Cars'][2]": { + "new_path": "root['Cars'][1]", + "new_value": { + "id": "3", + "make": "Toyota", + "model": "4Runner", + "model_numbers": [ + 1, + 2, + 3, + 4 + ] + } + } + } +} diff --git a/tests/fixtures/compare_func_t1.json b/tests/fixtures/compare_func_t1.json new file mode 100644 index 00000000..fd4fd0c1 --- /dev/null +++ b/tests/fixtures/compare_func_t1.json @@ -0,0 +1,50 @@ +{ + "Cars": [ + { + "id": "1", + "make": "Toyota", + "model": "Camry", + "dealers": [ + { + "id": 103, + "address": "103 Fake St", + "quantity": 50 + }, + { + "id": 105, + "address": "105 Fake St", + "quantity": 20 + } + ] + }, + { + "id": "2", + "make": "Toyota", + "model": "Highlander", + "dealers": [ + { + "id": 123, + "address": "123 Fake St", + "quantity": 50 + }, + { + "id": 125, + "address": "125 Fake St", + "quantity": 20 + } + ] + }, + { + "id": "3", + "make": "Toyota", + "model": "4Runner", + "model_numbers": [1, 2, 4] + }, + { + "id": "4", + "make": "Toyota", + "model": "supra", + "production": false + } + ] +} diff --git a/tests/fixtures/compare_func_t2.json b/tests/fixtures/compare_func_t2.json new file mode 100644 index 00000000..3e7f4c3a --- /dev/null +++ b/tests/fixtures/compare_func_t2.json @@ -0,0 +1,49 @@ +{ + "Cars": [ + { + "id": "7", + "make": "Toyota", + "model": "8Runner" + }, + { + "id": "3", + "make": "Toyota", + "model": "4Runner", + "model_numbers": [1, 2, 3, 4] + }, + { + "id": "1", + "make": "Toyota", + "model": "Camry", + "dealers": [ + { + "id": 105, + "address": "105 Fake St", + "quantity": 50 + }, + { + "id": 200, + "address": "200 Fake St", + "quantity": 10 + } + ] + }, + { + "id": "4", + "make": "Toyota", + "model": "Supra", + "dealers": [ + { + "id": 123, + "address": "123 Fake St", + "quantity": 50 + }, + { + "id": 125, + "address": "125 Fake St", + "quantity": 20 + } + ] + } + ] +} diff --git a/tests/test_delta.py b/tests/test_delta.py index af5051f9..ad8db0d0 100644 --- a/tests/test_delta.py +++ b/tests/test_delta.py @@ -5,7 +5,7 @@ from decimal import Decimal from unittest import mock from deepdiff import Delta, DeepDiff -from deepdiff.helper import np, number_to_string, TEXT_VIEW, DELTA_VIEW +from deepdiff.helper import np, number_to_string, TEXT_VIEW, DELTA_VIEW, CannotCompare from deepdiff.path import GETATTR, GET from deepdiff.delta import ( ELEM_NOT_FOUND_TO_ADD_MSG, @@ -1330,3 +1330,70 @@ def test_delta_with_json_serializer(self): delta_reloaded_again = Delta(delta_file=the_file, deserializer=json.loads) assert t2 == delta_reloaded_again + t1 + + +class TestDeltaCompareFunc: + + @staticmethod + def compare_func(x, y, level): + if (not isinstance(x, dict) or not isinstance(y, dict)): + raise CannotCompare + if(level.path() == "root['path2']"): + if (x["ID"] == y["ID"]): + return True + return False + + if("id" in x and "id" in y): + if (x["id"] == y["id"]): + return True + return False + + raise CannotCompare + + def test_pass(self, compare_func_t1, compare_func_t2, compare_func_result): + + ddiff = DeepDiff(compare_func_t1, compare_func_t2, iterable_compare_func=self.compare_func) + assert compare_func_result == ddiff + delta = Delta(ddiff) + recreated_t2 = compare_func_t1 + delta + assert compare_func_t2 == recreated_t2 + + def test_compare_func_with_duplicates_removed(self): + t1 = [{'id': 1, 'val': 1}, {'id': 2, 'val': 2}, {'id': 1, 'val': 3}, {'id': 3, 'val': 3}] + t2 = [{'id': 3, 'val': 3}, {'id': 2, 'val': 2}, {'id': 1, 'val': 3}] + ddiff = DeepDiff(t1, t2, iterable_compare_func=self.compare_func) + expected = {'values_changed': {"root[0]['val']": {'new_value': 3, 'old_value': 1}}, 'iterable_item_removed': {'root[2]': {'id': 1, 'val': 3}}, 'iterable_item_moved': {'root[0]': {'new_path': 'root[2]', 'new_value': {'id': 1, 'val': 3}}, 'root[3]': {'new_path': 'root[0]', 'new_value': {'id': 3, 'val': 3}}}} + assert expected == ddiff + delta = Delta(ddiff) + recreated_t2 = t1 + delta + assert t2 == recreated_t2 + + def test_compare_func_with_duplicates_added(self): + t1 = [{'id': 3, 'val': 3}, {'id': 2, 'val': 2}, {'id': 1, 'val': 3}] + t2 = [{'id': 1, 'val': 1}, {'id': 2, 'val': 2}, {'id': 1, 'val': 3}, {'id': 3, 'val': 3}] + ddiff = DeepDiff(t1, t2, iterable_compare_func=self.compare_func) + expected = {'values_changed': {"root[2]['val']": {'new_value': 1, 'old_value': 3}}, 'iterable_item_added': {'root[2]': {'id': 1, 'val': 3}}, 'iterable_item_moved': {'root[2]': {'new_path': 'root[0]', 'new_value': {'id': 1, 'val': 1}}, 'root[0]': {'new_path': 'root[3]', 'new_value': {'id': 3, 'val': 3}}}} + assert expected == ddiff + delta = Delta(ddiff) + recreated_t2 = t1 + delta + assert t2 == recreated_t2 + + def test_compare_func_swap(self): + t1 = [{'id': 1, 'val': 1}, {'id': 1, 'val': 3}] + t2 = [{'id': 1, 'val': 3}, {'id': 1, 'val': 1}] + ddiff = DeepDiff(t1, t2, iterable_compare_func=self.compare_func) + expected = {'values_changed': {"root[0]['val']": {'new_value': 3, 'old_value': 1}, "root[1]['val']": {'new_value': 1, 'old_value': 3}}} + assert expected == ddiff + delta = Delta(ddiff) + recreated_t2 = t1 + delta + assert t2 == recreated_t2 + + def test_compare_func_path_specific(self): + t1 = {"path1": [{'id': 1, 'val': 1}, {'id': 2, 'val': 3}], "path2": [{'ID': 4, 'val': 3}, {'ID': 3, 'val': 1}, ], "path3": [{'no_id': 5, 'val': 1}, {'no_id': 6, 'val': 3}]} + t2 = {"path1": [{'id': 1, 'val': 1}, {'id': 2, 'val': 3}], "path2": [{'ID': 3, 'val': 1}, {'ID': 4, 'val': 3}], "path3": [{'no_id': 5, 'val': 1}, {'no_id': 6, 'val': 3}]} + ddiff = DeepDiff(t1, t2, iterable_compare_func=self.compare_func) + expected = {'iterable_item_moved': {"root['path2'][0]": {'new_path': "root['path2'][1]", 'new_value': {'ID': 4, 'val': 3}},"root['path2'][1]": {'new_path': "root['path2'][0]", 'new_value': {'ID': 3, 'val': 1}}}} + assert expected == ddiff + delta = Delta(ddiff) + recreated_t2 = t1 + delta + assert t2 == recreated_t2 diff --git a/tests/test_ignore_order.py b/tests/test_ignore_order.py index 14123b60..791bc9db 100644 --- a/tests/test_ignore_order.py +++ b/tests/test_ignore_order.py @@ -780,165 +780,3 @@ def test_ignore_order_and_group_by(self): expected2 = {'iterable_item_added': {"root['BB']['ate'][1]": 'Brownies'}} assert expected2 == diff2 - def test_compare_func(self): - t1 = { - "Cars": [ - { - "id": "1", - "make": "Toyota", - "model": "Camry", - "dealers": [ - { - "id": 103, - "address": "103 Fake St", - "quantity": 50 - }, - { - "id": 105, - "address": "105 Fake St", - "quantity": 20 - } - ] - }, - { - "id": "2", - "make": "Toyota", - "model": "Highlander", - "dealers": [ - { - "id": 123, - "address": "123 Fake St", - "quantity": 50 - }, - { - "id": 125, - "address": "125 Fake St", - "quantity": 20 - } - ] - }, - { - "id": "3", - "make": "Toyota", - "model": "4Runner", - "model_numbers": [1, 2, 4] - }, - { - "id": "4", - "make": "Toyota", - "model": "supra", - "production": False - } - ] - } - - t2 = { - "Cars": [ - { - "id": "7", - "make": "Toyota", - "model": "8Runner" - }, - { - "id": "3", - "make": "Toyota", - "model": "4Runner", - "model_numbers": [1, 2, 3, 4] - }, - { - "id": "1", - "make": "Toyota", - "model": "Camry", - "dealers": [ - { - "id": 105, - "address": "105 Fake St", - "quantity": 50 - }, - { - "id": 200, - "address": "200 Fake St", - "quantity": 10 - } - ] - }, - { - "id": "4", - "make": "Toyota", - "model": "Supra", - "dealers": [ - { - "id": 123, - "address": "123 Fake St", - "quantity": 50 - }, - { - "id": 125, - "address": "125 Fake St", - "quantity": 20 - } - ] - } - ] - } - - - - def compare_func(x, y): - if(not isinstance(x, dict) or not isinstance(y, dict)): - raise CannotCompare - - if("id" not in x or "id" not in y): - raise CannotCompare - if(x["id"] == y["id"]): - return True - return False - - - ddiff = DeepDiff(t1, t2, iterable_compare_func=compare_func) - expected = {'dictionary_item_added': ["root['Cars'][3]['dealers']"], - 'dictionary_item_removed': ["root['Cars'][3]['production']"], - 'values_changed': {"root['Cars'][0]['dealers'][1]['quantity']": {'new_value': 50, - 'old_value': 20}, - "root['Cars'][2]['model_numbers'][2]": {'new_value': 3, 'old_value': 4}, - "root['Cars'][3]['model']": {'new_value': 'Supra', 'old_value': 'supra'}}, - 'iterable_item_added': {"root['Cars'][0]['dealers'][1]": {'id': 200, - 'address': '200 Fake St', - 'quantity': 10}, - "root['Cars'][2]['model_numbers'][3]": 4, - "root['Cars'][0]": {'id': '7', 'make': 'Toyota', 'model': '8Runner'}}, - 'iterable_item_removed': {"root['Cars'][0]['dealers'][0]": {'id': 103, - 'address': '103 Fake St', - 'quantity': 50}, - "root['Cars'][1]": {'id': '2', - 'make': 'Toyota', - 'model': 'Highlander', - 'dealers': [ - {'id': 123, 'address': '123 Fake St', - 'quantity': 50}, - {'id': 125, 'address': '125 Fake St', - 'quantity': 20}]}}, - 'iterable_item_moved': {"root['Cars'][0]": {'new_path': "root['Cars'][2]", - 'new_value': {'id': '1', - 'make': 'Toyota', - 'model': 'Camry', - 'dealers': [ - {'id': 105, 'address': '105 Fake St', - 'quantity': 50}, - {'id': 200, 'address': '200 Fake St', - 'quantity': 10}]}}, - "root['Cars'][0]['dealers'][1]": { - 'new_path': "root['Cars'][0]['dealers'][0]", - 'new_value': {'id': 105, 'address': '105 Fake St', - 'quantity': 50}}, - "root['Cars'][2]": {'new_path': "root['Cars'][1]", - 'new_value': {'id': '3', - 'make': 'Toyota', - 'model': '4Runner', - 'model_numbers': [1, 2, 3, 4]}}}} - - assert expected == ddiff - delta = Delta(ddiff) - recreated_t2 = t1 + delta - replay_diff = DeepDiff(recreated_t2, t2) - assert replay_diff.to_dict() == {}