Skip to content

Enhancing by_relevance / best_match (PR for issue 728) #753

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
67 changes: 65 additions & 2 deletions jsonschema/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,46 @@ def total_errors(self):
child_errors = sum(len(tree) for _, tree in self._contents.items())
return len(self.errors) + child_errors

def get_instance_type(value):
"""
Returns the type to use in type comparaisons of values' types

Arguments:
value (object):
an object from the schema or the instance

Returns (type):
the type of the object passed consistent with other JSON types
"""
value_type = type(value)
if value_type in [int, float]:
return float
elif value_type in [bool, str, list, dict]:
return value_type
return None

def get_schema_type(value):
"""
Returns the type of the validation rule as a python type

Arguments:
value (string):
One of the values : ['string', 'array', 'object', 'number', None]

Returns (type):
One of the values : [str, list, dict, float, None]
"""
if value == 'string':
return str
elif value == 'number':
return float
elif value == 'object':
return dict
elif value == 'array':
return list
else:
return None


def by_relevance(weak=WEAK_MATCHES, strong=STRONG_MATCHES):
"""
Expand All @@ -301,8 +341,31 @@ def by_relevance(weak=WEAK_MATCHES, strong=STRONG_MATCHES):
a collection of validator names to consider to be "strong"
"""
def relevance(error):
validator = error.validator
return -len(error.path), validator not in weak, validator in strong
same_type = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So! Thanks again for trying to tackle this.

I still want to go for something a lot simpler than the code here. Maybe this will help as a start -- I think the changes to this file and this function probably should be just something like:

        same_type = False
        if "type" in error.schema:
            same_type = error._type_checker.is_type(error.instance, error.schema["type"])
        validator = error.validator
        return (
            -len(error.path),
            error.validator not in weak,
            error.validator in strong,
            not same_type,
        )

The missing piece there is that the type checker that's used is not in fact carried onto Errors at the minute, but that should be 1-2 lines to add.

If you want to see what that does, you can try instead starting with:

        same_type = False
        if "type" in error.schema:
            same_type = _types.draft7_type_checker.is_type(error.instance, error.schema["type"])
        validator = error.validator
        return (
            -len(error.path),
            error.validator not in weak,
            error.validator in strong,
            not same_type,
        )

And looking to see what it does to your test cases. From here it looks like it passes your first new one (which was the one that #728 was mainly targeting).

It fails the other two, but I'm not 100% convinced on the other two personally. If we do want to support them though, which yeah we should think about, there are other ways to handle them -- e.g. perhaps errors from non-container types (simple value types, e.g. strings, bools, etc.) should prioritize enum and const validators.

Hopefully the above is a breadcrumb to move this further a bit?

Let me know either way, and probably also worth having a look at CI to make sure it passes with whatever change, it'll be needed before we merge.

Appreciated again for staying on top of this!

# If the 'type' property has been provided within the schema rule
# set rule_type to the python <class 'type'> corresponding
if isinstance(error.schema, dict) and 'type' in error.schema:
rule_type = get_schema_type(error.schema['type'])
else:
rule_type = None

instance = error.instance
instance_type = None
instance_type = get_instance_type(instance)

if rule_type is not None:
same_type = instance_type == rule_type
elif error.validator == 'enum':
if isinstance(error.validator_value, list):
for value in error.validator_value:
same_type = get_instance_type(value) == instance_type
if same_type:
break
elif error.validator == 'const':
same_type = get_instance_type(error.validator_value) == instance_type

return -len(error.path), error.validator not in weak, error.validator in strong, -same_type

return relevance


Expand Down
83 changes: 82 additions & 1 deletion jsonschema/tests/test_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest import TestCase
import textwrap

from jsonschema import Draft4Validator, exceptions
from jsonschema import Draft4Validator, Draft7Validator, exceptions


class TestBestMatch(TestCase):
Expand Down Expand Up @@ -154,6 +154,87 @@ def test_no_errors(self):
validator = Draft4Validator({})
self.assertIsNone(exceptions.best_match(validator.iter_errors({})))

def test_same_type_is_prioritized(self):
# Best match to prioritize error with the same type as value regardless of order
validator = Draft7Validator({
"properties" : {
"value" : {
"anyOf" : [
{"type" : "array" , "minItems" : 2},
{"type" : "string", "minLength" : 10},
],
},
},
},
)
self.assertEqual(exceptions.best_match(validator.iter_errors({"value" : "foo"})).validator, "minLength")

validator = Draft7Validator({
"properties" : {
"value" : {
"anyOf" : [
{"type" : "string", "minLength" : 10},
{"type" : "array" , "minItems" : 2},
],
},
},
},
)
self.assertEqual(exceptions.best_match(validator.iter_errors({"value" : "foo"})).validator, "minLength")

def test_same_type_is_prioritized_const(self):
validator = Draft7Validator({
"properties" : {
"value" : {
"anyOf" : [
{"type" : "array" , "minItems" : 2},
{"const" : "bar"},
],
},
},
},
)
self.assertEqual(exceptions.best_match(validator.iter_errors({"value" : "foo"})).validator, "const")

validator = Draft7Validator({
"properties" : {
"value" : {
"anyOf" : [
{"const" : "bar"},
{"type" : "array" , "minItems" : 2},
],
},
},
},
)
self.assertEqual(exceptions.best_match(validator.iter_errors({"value" : "foo"})).validator, "const")

def test_same_type_is_prioritized_enum(self):
validator = Draft7Validator({
"properties" : {
"value" : {
"anyOf" : [
{"type" : "array" , "minItems" : 2},
{"enum" : ["bar", 2.3]},
],
},
},
},
)
self.assertEqual(exceptions.best_match(validator.iter_errors({"value" : "foo"})).validator, "enum")

validator = Draft7Validator({
"properties" : {
"value" : {
"anyOf" : [
{"enum" : ["bar", 2.3]},
{"type" : "array" , "minItems" : 2},
],
},
},
},
)
self.assertEqual(exceptions.best_match(validator.iter_errors({"value" : "foo"})).validator, "enum")

class TestByRelevance(TestCase):
def test_short_paths_are_better_matches(self):
Expand Down