From 5ce1de7adb06dbe96fdfefb8dbff3f5546126f27 Mon Sep 17 00:00:00 2001 From: Harald Nezbeda Date: Mon, 14 Jun 2021 12:17:02 +0200 Subject: [PATCH 1/4] Julian/jsonschema#686: Fixes unique and equal checks --- jsonschema/_utils.py | 48 +++++- .../tests/test_jsonschema_test_suite.py | 160 ------------------ 2 files changed, 46 insertions(+), 162 deletions(-) diff --git a/jsonschema/_utils.py b/jsonschema/_utils.py index 21d8931e7..2b46d100e 100644 --- a/jsonschema/_utils.py +++ b/jsonschema/_utils.py @@ -158,10 +158,46 @@ def ensure_list(thing): return thing +def dict_equal(one, two): + """ + Check if two dicts are the same using `equal` + """ + if len(one.keys()) != len(two.keys()): + return False + + for key in one: + if key not in two: + return False + if not equal(one[key], two[key]): + return False + + return True + + +def list_equal(one, two): + """ + Check if two lists are the same using `equal` + """ + if len(one) != len(two): + return False + + for i in range(0, len(one)): + if not equal(one[i], two[i]): + return False + + return True + + def equal(one, two): """ Check if two things are equal, but evade booleans and ints being equal. """ + if isinstance(one, list) and isinstance(two, list): + return list_equal(one, two) + + if isinstance(one, dict) and isinstance(two, dict): + return dict_equal(one, two) + return unbool(one) == unbool(two) @@ -192,14 +228,22 @@ def uniq(container): try: sort = sorted(unbool(i) for i in container) sliced = itertools.islice(sort, 1, None) + for i, j in zip(sort, sliced): + if isinstance(i, list) and isinstance(j, list): + return not list_equal(i, j) if i == j: return False + except (NotImplementedError, TypeError): seen = [] for e in container: e = unbool(e) - if e in seen: - return False + + for i in seen: + if isinstance(i, dict) and isinstance(e, dict): + if dict_equal(i, e): + return False + seen.append(e) return True diff --git a/jsonschema/tests/test_jsonschema_test_suite.py b/jsonschema/tests/test_jsonschema_test_suite.py index 7d77bd400..5725d328d 100644 --- a/jsonschema/tests/test_jsonschema_test_suite.py +++ b/jsonschema/tests/test_jsonschema_test_suite.py @@ -145,36 +145,6 @@ def leap_second(test): "$ref prevents a sibling $id from changing the base uri" ), )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": false} and {"a": 0} are unique', - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": true} and {"a": 1} are unique', - )(test) ), ) @@ -240,36 +210,6 @@ def leap_second(test): subject="refRemote", case_description="base URI change - change folder in subschema", )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": false} and {"a": 0} are unique', - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": true} and {"a": 1} are unique', - )(test) ), ) @@ -351,56 +291,6 @@ def leap_second(test): subject="refRemote", case_description="base URI change - change folder in subschema", )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": false} and {"a": 0} are unique', - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": true} and {"a": 1} are unique', - )(test) - or skip( - message=bug(686), - subject="const", - case_description="const with [false] does not match [0]", - )(test) - or skip( - message=bug(686), - subject="const", - case_description="const with [true] does not match [1]", - )(test) - or skip( - message=bug(686), - subject="const", - case_description='const with {"a": false} does not match {"a": 0}', - )(test) - or skip( - message=bug(686), - subject="const", - case_description='const with {"a": true} does not match {"a": 1}', - )(test) ), ) @@ -506,55 +396,5 @@ def leap_second(test): "validation of binary-encoded media type documents" ), )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="[1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [0] and [false] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description="nested [1] and [true] are unique", - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": false} and {"a": 0} are unique', - )(test) - or skip( - message=bug(686), - subject="uniqueItems", - description='{"a": true} and {"a": 1} are unique', - )(test) - or skip( - message=bug(686), - subject="const", - case_description="const with [false] does not match [0]", - )(test) - or skip( - message=bug(686), - subject="const", - case_description="const with [true] does not match [1]", - )(test) - or skip( - message=bug(686), - subject="const", - case_description='const with {"a": false} does not match {"a": 0}', - )(test) - or skip( - message=bug(686), - subject="const", - case_description='const with {"a": true} does not match {"a": 1}', - )(test) ), ) From 3e5781df75e3261fc8ea2c6c92f6881a7ba6c9c9 Mon Sep 17 00:00:00 2001 From: Harald Nezbeda Date: Mon, 28 Jun 2021 18:26:44 +0200 Subject: [PATCH 2/4] Add test for list_equal and dict_equal --- jsonschema/tests/test_utils.py | 135 +++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 jsonschema/tests/test_utils.py diff --git a/jsonschema/tests/test_utils.py b/jsonschema/tests/test_utils.py new file mode 100644 index 000000000..9efadd6d2 --- /dev/null +++ b/jsonschema/tests/test_utils.py @@ -0,0 +1,135 @@ +from unittest import TestCase + +from jsonschema._utils import dict_equal, list_equal + + +class TestDictEqual(TestCase): + + def test_equal_dictionaries(self): + dict_1 = {"a": "b", "c": "d"} + dict_2 = {"c": "d", "a": "b"} + self.assertTrue(dict_equal(dict_1, dict_2)) + + def test_missing_key(self): + dict_1 = {"a": "b", "c": "d"} + dict_2 = {"c": "d", "x": "b"} + self.assertFalse(dict_equal(dict_1, dict_2)) + + def test_additional_key(self): + dict_1 = {"a": "b", "c": "d"} + dict_2 = {"c": "d", "a": "b", "x": "x"} + self.assertFalse(dict_equal(dict_1, dict_2)) + + def test_missing_value(self): + dict_1 = {"a": "b", "c": "d"} + dict_2 = {"c": "d", "a": "x"} + self.assertFalse(dict_equal(dict_1, dict_2)) + + def test_empty_dictionaries(self): + dict_1 = {} + dict_2 = {} + self.assertTrue(dict_equal(dict_1, dict_2)) + + def test_one_none(self): + dict_1 = None + dict_2 = {"a": "b", "c": "d"} + with self.assertRaises(AttributeError): + self.assertFalse(dict_equal(dict_1, dict_2)) + with self.assertRaises(AttributeError): + self.assertFalse(dict_equal(dict_1, dict_2)) + + def test_both_none(self): + with self.assertRaises(AttributeError): + self.assertFalse(dict_equal(None, None)) + + def test_same_item(self): + dict_1 = {"a": "b", "c": "d"} + self.assertTrue(dict_equal(dict_1, dict_1)) + + def test_nested_dict_equal(self): + dict_1 = {"a": {"a": "b", "c": "d"}, "c": "d"} + dict_2 = {"c": "d", "a": {"a": "b", "c": "d"}} + self.assertTrue(dict_equal(dict_1, dict_2)) + + def test_nested_dict_unequal(self): + dict_1 = {"a": {"a": "b", "c": "d"}, "c": "d"} + dict_2 = {"c": "d", "a": {"a": "b", "c": "x"}} + self.assertFalse(dict_equal(dict_1, dict_2)) + + def test_nested_list_equal(self): + dict_1 = {"a": ["a", "b", "c", "d"], "c": "d"} + dict_2 = {"c": "d", "a": ["a", "b", "c", "d"]} + self.assertTrue(dict_equal(dict_1, dict_2)) + + def test_nested_list_unequal(self): + dict_1 = {"a": ["a", "b", "c", "d"], "c": "d"} + dict_2 = {"c": "d", "a": ["b", "c", "d", "a"]} + self.assertFalse(dict_equal(dict_1, dict_2)) + + +class TestListEqual(TestCase): + + def test_equal_lists(self): + list_1 = ["a", "b", "c"] + list_2 = ["a", "b", "c"] + self.assertTrue(list_equal(list_1, list_2)) + + def test_unsorted_lists(self): + list_1 = ["a", "b", "c"] + list_2 = ["b", "b", "a"] + self.assertFalse(list_equal(list_1, list_2)) + + def test_first_list_larger(self): + list_1 = ["a", "b", "c"] + list_2 = ["a", "b"] + self.assertFalse(list_equal(list_1, list_2)) + + def test_second_list_larger(self): + list_1 = ["a", "b"] + list_2 = ["a", "b", "c"] + self.assertFalse(list_equal(list_1, list_2)) + + def test_list_with_none_unequal(self): + list_1 = ["a", "b", None] + list_2 = ["a", "b", "c"] + self.assertFalse(list_equal(list_1, list_2)) + + list_1 = ["a", "b", None] + list_2 = [None, "b", "c"] + self.assertFalse(list_equal(list_1, list_2)) + + def test_list_with_none_equal(self): + list_1 = ["a", None, "c"] + list_2 = ["a", None, "c"] + self.assertTrue(list_equal(list_1, list_2)) + + def test_empty_list(self): + list_1 = [] + list_2 = [] + self.assertTrue(list_equal(list_1, list_2)) + + def test_one_none(self): + list_1 = None + list_2 = [] + with self.assertRaises(TypeError): + self.assertTrue(list_equal(list_1, list_2)) + + def test_both_none(self): + list_1 = None + list_2 = None + with self.assertRaises(TypeError): + self.assertTrue(list_equal(list_1, list_2)) + + def test_same_list(self): + list_1 = ["a", "b", "c"] + self.assertTrue(list_equal(list_1, list_1)) + + def test_equal_nested_lists(self): + list_1 = ["a", ["b", "c"], "d"] + list_2 = ["a", ["b", "c"], "d"] + self.assertTrue(list_equal(list_1, list_2)) + + def test_unequal_nested_lists(self): + list_1 = ["a", ["b", "c"], "d"] + list_2 = ["a", [], "c"] + self.assertFalse(list_equal(list_1, list_2)) From 4ef44a74d3986375eba6a7c118545f430f807e22 Mon Sep 17 00:00:00 2001 From: Harald Nezbeda Date: Tue, 29 Jun 2021 17:59:28 +0200 Subject: [PATCH 3/4] Add test case with custom sequance type --- jsonschema/_utils.py | 44 +++++++++++++---------------- jsonschema/tests/test_validators.py | 38 ++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/jsonschema/_utils.py b/jsonschema/_utils.py index 2b46d100e..e7fdb3e77 100644 --- a/jsonschema/_utils.py +++ b/jsonschema/_utils.py @@ -217,33 +217,27 @@ def uniq(container): """ Check if all of a container's elements are unique. - Successively tries first to rely that the elements are hashable, then - falls back on them being sortable, and finally falls back on brute - force. + Successively tries first to rely that the elements are being sortable + and finally falls back on brute force. """ - try: - return len(set(unbool(i) for i in container)) == len(container) - except TypeError: - try: - sort = sorted(unbool(i) for i in container) - sliced = itertools.islice(sort, 1, None) - - for i, j in zip(sort, sliced): - if isinstance(i, list) and isinstance(j, list): - return not list_equal(i, j) - if i == j: + sort = sorted(unbool(i) for i in container) + sliced = itertools.islice(sort, 1, None) + + for i, j in zip(sort, sliced): + return not list_equal(list(i), list(j)) + + except (NotImplementedError, TypeError): + seen = [] + for e in container: + e = unbool(e) + + for i in seen: + if isinstance(i, dict) and isinstance(e, dict): + if dict_equal(i, e): + return False + elif i == e: return False - except (NotImplementedError, TypeError): - seen = [] - for e in container: - e = unbool(e) - - for i in seen: - if isinstance(i, dict) and isinstance(e, dict): - if dict_equal(i, e): - return False - - seen.append(e) + seen.append(e) return True diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 1fbcc3f0c..674eeaace 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1,4 +1,4 @@ -from collections import deque +from collections import deque, namedtuple from contextlib import contextmanager from decimal import Decimal from io import BytesIO @@ -1088,6 +1088,42 @@ def test_it_properly_formats_tuples_in_errors(self): TupleValidator({"uniqueItems": True}).validate((1, 1)) self.assertIn("(1, 1) has non-unique elements", str(e.exception)) + def test_check_redefined_sequence(self): + """ + Allow array to validate against another defined sequence type + """ + schema = { + "type": "array", + "uniqueItems": True + } + + MyMapping = namedtuple('MyMapping', 'a, b') + + Validator = validators.extend( + self.Validator, + type_checker=self.Validator.TYPE_CHECKER.redefine( + "array", + lambda checker, thing: isinstance(thing, (list, deque)), + ) + ) + + validator = Validator(schema) + validator.validate(deque(['a', None, '1', '', True])) + with self.assertRaises(exceptions.ValidationError): + validator.validate(deque(['a', 'b', 'a'])) + + validator.validate(deque([[False], [0]])) + with self.assertRaises(exceptions.ValidationError): + validator.validate(deque([[False], [False]])) + + validator.validate([deque([False]), deque([0])]) + with self.assertRaises(exceptions.ValidationError): + validator.validate([deque([False]), deque([False])]) + + validator.validate([MyMapping('a', 0), MyMapping('a', False)]) + with self.assertRaises(exceptions.ValidationError): + validator.validate([MyMapping('a', False), MyMapping('a', False)]) + class AntiDraft6LeakMixin(object): """ From 27661b2f9460c3c9d94b078884111ed911bf8e53 Mon Sep 17 00:00:00 2001 From: Harald Nezbeda Date: Fri, 2 Jul 2021 13:20:19 +0200 Subject: [PATCH 4/4] Extend sequance and mapping check --- jsonschema/_utils.py | 30 ++++++++--- jsonschema/tests/test_validators.py | 77 ++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 26 deletions(-) diff --git a/jsonschema/_utils.py b/jsonschema/_utils.py index e7fdb3e77..6ff629f64 100644 --- a/jsonschema/_utils.py +++ b/jsonschema/_utils.py @@ -1,5 +1,6 @@ from collections.abc import MutableMapping from urllib.parse import urlsplit +import collections import itertools import json import pkgutil @@ -188,14 +189,32 @@ def list_equal(one, two): return True +def is_sequence(instance): + """ + Checks if an instance is a sequence but not a string + """ + return isinstance( + instance, collections.Sequence + ) and not isinstance( + instance, str + ) + + +def is_mapping(instance): + """ + Checks if an instance is a mapping + """ + return isinstance(instance, collections.Mapping) + + def equal(one, two): """ Check if two things are equal, but evade booleans and ints being equal. """ - if isinstance(one, list) and isinstance(two, list): + if is_sequence(one) and is_sequence(two): return list_equal(one, two) - if isinstance(one, dict) and isinstance(two, dict): + if is_mapping(one) and is_mapping(two): return dict_equal(one, two) return unbool(one) == unbool(two) @@ -225,7 +244,7 @@ def uniq(container): sliced = itertools.islice(sort, 1, None) for i, j in zip(sort, sliced): - return not list_equal(list(i), list(j)) + return not list_equal(i, j) except (NotImplementedError, TypeError): seen = [] @@ -233,10 +252,7 @@ def uniq(container): e = unbool(e) for i in seen: - if isinstance(i, dict) and isinstance(e, dict): - if dict_equal(i, e): - return False - elif i == e: + if equal(i, e): return False seen.append(e) diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 674eeaace..af5dfebce 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1096,33 +1096,72 @@ def test_check_redefined_sequence(self): "type": "array", "uniqueItems": True } - MyMapping = namedtuple('MyMapping', 'a, b') - Validator = validators.extend( self.Validator, - type_checker=self.Validator.TYPE_CHECKER.redefine( - "array", - lambda checker, thing: isinstance(thing, (list, deque)), - ) + type_checker=self.Validator.TYPE_CHECKER.redefine_many({ + "array": lambda checker, thing: isinstance( + thing, (list, deque) + ), + "object": lambda checker, thing: isinstance( + thing, (dict, MyMapping) + ), + }) ) - validator = Validator(schema) - validator.validate(deque(['a', None, '1', '', True])) - with self.assertRaises(exceptions.ValidationError): - validator.validate(deque(['a', 'b', 'a'])) - validator.validate(deque([[False], [0]])) - with self.assertRaises(exceptions.ValidationError): - validator.validate(deque([[False], [False]])) + valid_instances = [ + deque(['a', None, '1', '', True]), + deque([[False], [0]]), + [deque([False]), deque([0])], + [[deque([False])], [deque([0])]], + [[[[[deque([False])]]]], [[[[deque([0])]]]]], + [deque([deque([False])]), deque([deque([0])])], + [MyMapping('a', 0), MyMapping('a', False)], + [ + MyMapping('a', [deque([0])]), + MyMapping('a', [deque([False])]) + ], + [ + MyMapping('a', [ + MyMapping('a', deque([0])) + ]), + MyMapping('a', [ + MyMapping('a', deque([False])) + ]) + ], + [deque(deque(deque([False]))), deque(deque(deque([0])))], + ] - validator.validate([deque([False]), deque([0])]) - with self.assertRaises(exceptions.ValidationError): - validator.validate([deque([False]), deque([False])]) + for instance in valid_instances: + validator.validate(instance) + + invalid_instances = [ + deque(['a', 'b', 'a']), + deque([[False], [False]]), + [deque([False]), deque([False])], + [[deque([False])], [deque([False])]], + [[[[[deque([False])]]]], [[[[deque([False])]]]]], + [deque([deque([False])]), deque([deque([False])])], + [MyMapping('a', False), MyMapping('a', False)], + [ + MyMapping('a', [deque([False])]), + MyMapping('a', [deque([False])]) + ], + [ + MyMapping('a', [ + MyMapping('a', deque([False])) + ]), + MyMapping('a', [ + MyMapping('a', deque([False])) + ]) + ], + [deque(deque(deque([False]))), deque(deque(deque([False])))], + ] - validator.validate([MyMapping('a', 0), MyMapping('a', False)]) - with self.assertRaises(exceptions.ValidationError): - validator.validate([MyMapping('a', False), MyMapping('a', False)]) + for instance in invalid_instances: + with self.assertRaises(exceptions.ValidationError): + validator.validate(instance) class AntiDraft6LeakMixin(object):