diff --git a/CHANGES.rst b/CHANGES.rst index 89a68018..2e00ac80 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,8 +2,17 @@ Changelog ========= -0.18.2 (not yet released) -~~~~~~~~~~~~~~~~~~~~~~~~~ +0.19.0 (2021-07-20) +~~~~~~~~~~~~~~~~~~~ + +New features +------------ + ++ `#85`_: add an argument `join_specs` to the constructor of class + :class:`icat.query.Query` and a corresponding method + :meth:`icat.query.Query.setJoinSpecs` to override the join + specification to be used in the created query for selected related + objects. Bug fixes and minor changes --------------------------- @@ -18,6 +27,7 @@ Bug fixes and minor changes .. _#83: https://github.com/icatproject/python-icat/issues/83 .. _#84: https://github.com/icatproject/python-icat/pull/84 +.. _#85: https://github.com/icatproject/python-icat/pull/85 0.18.1 (2021-04-13) diff --git a/icat/dump_queries.py b/icat/dump_queries.py index 329e6e2a..22d0fee6 100644 --- a/icat/dump_queries.py +++ b/icat/dump_queries.py @@ -36,11 +36,9 @@ def getAuthQueries(client): return [ Query(client, "User", order=True), Query(client, "Grouping", order=True, includes={"userGroups", "userGroups.user"}), - Query(client, "Rule", order=["what", "id"], - conditions={"grouping": "IS NULL"}), Query(client, "Rule", order=["grouping.name", "what", "id"], - conditions={"grouping": "IS NOT NULL"}, - includes={"grouping"}), + includes={"grouping"}, + join_specs={"grouping": "LEFT JOIN"}), Query(client, "PublicStep", order=True) ] def getStaticQueries(client): diff --git a/icat/exception.py b/icat/exception.py index df37e8be..6c828315 100644 --- a/icat/exception.py +++ b/icat/exception.py @@ -308,14 +308,14 @@ class ConfigError(_BaseException): class QueryWarning(Warning): """Warning while building a query. - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 """ pass class QueryNullableOrderWarning(QueryWarning): """Warn about using a nullable many to one relation for ordering. - .. versionchanged:: 0.18.2 + .. versionchanged:: 0.19.0 Inherit from :exc:`QueryWarning`. """ def __init__(self, attr): @@ -326,7 +326,7 @@ def __init__(self, attr): class QueryOneToManyOrderWarning(QueryWarning): """Warn about using a one to many relation for ordering. - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 """ def __init__(self, attr): msg = ("ordering on a one to many relation %s may surprisingly " diff --git a/icat/query.py b/icat/query.py index 47bbf24e..e8e84eb0 100644 --- a/icat/query.py +++ b/icat/query.py @@ -2,6 +2,12 @@ """ from warnings import warn +try: + # Python 3.3 and newer + from collections.abc import Mapping +except ImportError: + # Python 2 + from collections import Mapping import icat.entity from icat.exception import * @@ -45,6 +51,16 @@ :meth:`icat.query.Query.setAggregate` method. """ +jpql_join_specs = frozenset([ + "JOIN", + "INNER JOIN", + "LEFT JOIN", + "LEFT OUTER JOIN", +]) +"""Allowed values for the `join_specs` argument to the +:meth:`icat.query.Query.setJoinSpecs` method. +""" + # ========================== class Query ============================= class Query(object): @@ -75,10 +91,15 @@ class Query(object): :param limit: a tuple (skip, count) to be used in the LIMIT clause. See the :meth:`~icat.query.Query.setLimit` method for details. + :param join_specs: a mapping to override the join specification + for selected related objects. See the + :meth:`~icat.query.Query.setJoinSpecs` method for details. :param attribute: alias for `attributes`, retained for compatibility. Deprecated, use `attributes` instead. - :raise TypeError: if `entity` is not a valid entity type or if - both `attributes` and `attribute` are provided. + :raise TypeError: if `entity` is not a valid entity type, if both + `attributes` and `attribute` are provided, or if any of the + keyword arguments have an invalid type, see the corresponding + method for details. :raise ValueError: if any of the keyword arguments is not valid, see the corresponding method for details. @@ -86,11 +107,14 @@ class Query(object): add support for queries requesting a list of attributes rather then a single one. Consequently, the keyword argument `attribute` has been renamed to `attributes` (in the plural). + .. versionchanged:: 0.19.0 + add the `join_specs` argument. """ def __init__(self, client, entity, attributes=None, aggregate=None, order=None, - conditions=None, includes=None, limit=None, attribute=None): + conditions=None, includes=None, limit=None, + join_specs=None, attribute=None): """Initialize the query. """ @@ -123,6 +147,7 @@ def __init__(self, client, entity, self.addConditions(conditions) self.includes = set() self.addIncludes(includes) + self.setJoinSpecs(join_specs) self.setOrder(order) self.setLimit(limit) self._init = None @@ -251,6 +276,39 @@ def setAggregate(self, function): else: self.aggregate = None + def setJoinSpecs(self, join_specs): + """Override the join specifications. + + :param join_specs: a mapping of related object names to join + specifications. Allowed values are "JOIN", "INNER JOIN", + "LEFT JOIN", and "LEFT OUTER JOIN". Any entry in this + mapping overrides how this particular related object is to + be joined. The default for any relation not included in + the mapping is "JOIN". A special value of :const:`None` + for `join_specs` is equivalent to the empty mapping. + :type join_specs: :class:`dict` + :raise TypeError: if `join_specs` is not a mapping. + :raise ValueError: if any key in `join_specs` is not a name of + a related object or if any value is not in the allowed + set. + + .. versionadded:: 0.19.0 + """ + if join_specs: + if not isinstance(join_specs, Mapping): + raise TypeError("join_specs must be a mapping") + for obj, js in join_specs.items(): + for (pattr, attrInfo, rclass) in self._attrpath(obj): + pass + if rclass is None: + raise ValueError("%s.%s is not a related object" + % (self.entity.BeanName, obj)) + if js not in jpql_join_specs: + raise ValueError("invalid join specification %s" % js) + self.join_specs = join_specs + else: + self.join_specs = dict() + def setOrder(self, order): """Set the order to build the ORDER BY clause from. @@ -264,7 +322,7 @@ def setOrder(self, order): :type order: iterable or :class:`bool` :raise ValueError: if any attribute in `order` is not valid. - .. versionchanged:: 0.18.2 + .. versionchanged:: 0.19.0 allow one to many relationships in `order`. Emit a :exc:`~icat.exception.QueryOneToManyOrderWarning` rather then raising a :exc:`ValueError` in this case. @@ -289,15 +347,17 @@ def setOrder(self, order): for (pattr, attrInfo, rclass) in self._attrpath(obj): if attrInfo.relType == "ONE": - if (not attrInfo.notNullable and - pattr not in self.conditions): + if (not attrInfo.notNullable and + pattr not in self.conditions and + pattr not in self.join_specs): sl = 3 if self._init else 2 - warn(QueryNullableOrderWarning(pattr), + warn(QueryNullableOrderWarning(pattr), stacklevel=sl) elif attrInfo.relType == "MANY": - sl = 3 if self._init else 2 - warn(QueryOneToManyOrderWarning(pattr), - stacklevel=sl) + if (pattr not in self.join_specs): + sl = 3 if self._init else 2 + warn(QueryOneToManyOrderWarning(pattr), + stacklevel=sl) if rclass is None: # obj is an attribute, use it right away. @@ -428,7 +488,8 @@ def __str__(self): base = "SELECT %s FROM %s o" % (res, self.entity.BeanName) joins = "" for obj in sorted(subst.keys()): - joins += " JOIN %s" % self._dosubst(obj, subst) + js = self.join_specs.get(obj, "JOIN") + joins += " %s %s" % (js, self._dosubst(obj, subst)) if self.conditions: conds = [] for a in sorted(self.conditions.keys()): diff --git a/tests/test_06_query.py b/tests/test_06_query.py index 43ca3754..a6bd1a1a 100644 --- a/tests/test_06_query.py +++ b/tests/test_06_query.py @@ -49,8 +49,8 @@ def test_query_datafile(client): """Query a datafile by its name, dataset name, and investigation name. """ dfdata = { - 'name': "e208945.nxs", - 'dataset': "e208945", + 'name': "e208945.nxs", + 'dataset': "e208945", 'investigation': "12100409-ST" } conditions = { @@ -85,14 +85,14 @@ def test_query_datafile(client): def test_query_investigation_includes(client): """Query lots of information about one single investigation. """ - includes = { "facility", "type.facility", "investigationInstruments", - "investigationInstruments.instrument.facility", "shifts", - "keywords", "publications", "investigationUsers", - "investigationUsers.user", "investigationGroups", - "investigationGroups.grouping", "parameters", + includes = { "facility", "type.facility", "investigationInstruments", + "investigationInstruments.instrument.facility", "shifts", + "keywords", "publications", "investigationUsers", + "investigationUsers.user", "investigationGroups", + "investigationGroups.grouping", "parameters", "parameters.type.facility" } - query = Query(client, "Investigation", - conditions={"id": "= %d" % investigation.id}, + query = Query(client, "Investigation", + conditions={"id": "= %d" % investigation.id}, includes=includes) print(str(query)) res = client.search(query) @@ -111,10 +111,10 @@ def test_query_investigation_includes(client): def test_query_instruments(client): """Query the instruments related to a given investigation. """ - query = Query(client, "Instrument", - order=["name"], + query = Query(client, "Instrument", + order=["name"], conditions={ "investigationInstruments.investigation.id": - "= %d" % investigation.id }, + "= %d" % investigation.id }, includes={"facility", "instrumentScientists.user"}) print(str(query)) res = client.search(query) @@ -127,10 +127,10 @@ def test_query_instruments(client): def test_query_datafile_by_investigation(client): """The datafiles related to a given investigation in natural order. """ - query = Query(client, "Datafile", order=True, + query = Query(client, "Datafile", order=True, conditions={ "dataset.investigation.id": - "= %d" % investigation.id }, - includes={"dataset", "datafileFormat.facility", + "= %d" % investigation.id }, + includes={"dataset", "datafileFormat.facility", "parameters.type.facility"}) print(str(query)) res = client.search(query) @@ -159,7 +159,7 @@ def test_query_datafiles_datafileformat(client, recwarn): Note: this raises a QueryNullableOrderWarning, see below. """ recwarn.clear() - query = Query(client, "Datafile", + query = Query(client, "Datafile", order=['datafileFormat', 'dataset', 'name']) w = recwarn.pop(icat.QueryNullableOrderWarning) assert issubclass(w.category, icat.QueryNullableOrderWarning) @@ -175,31 +175,31 @@ def test_query_order_direction(client): This has been added in Issue #48. """ # Try without specifying the ordering direction first: - query = Query(client, "Datafile", - order=["name"], + query = Query(client, "Datafile", + order=["name"], conditions={ "dataset.investigation.id": "= %d" % investigation.id }) print(str(query)) res = client.search(query) assert len(res) == 4 # Ascending order is the default, so we should get the same result: - query = Query(client, "Datafile", - order=[("name", "ASC")], + query = Query(client, "Datafile", + order=[("name", "ASC")], conditions={ "dataset.investigation.id": "= %d" % investigation.id }) print(str(query)) assert client.search(query) == res # Descending order should give the reverse result: - query = Query(client, "Datafile", - order=[("name", "DESC")], + query = Query(client, "Datafile", + order=[("name", "DESC")], conditions={ "dataset.investigation.id": "= %d" % investigation.id }) print(str(query)) assert list(reversed(client.search(query))) == res # We may even combine different ordering directions on multiple # attributes of the same query: - query = Query(client, "Datafile", - order=[("dataset.name", "DESC"), ("name", "ASC")], + query = Query(client, "Datafile", + order=[("dataset.name", "DESC"), ("name", "ASC")], conditions={ "dataset.investigation.id": "= %d" % investigation.id }) print(str(query)) @@ -266,7 +266,7 @@ def test_query_in_operator(client): """Using "id in (i)" rather then "id = i" also works. (This may be needed to work around ICAT Issue 128.) """ - query = Query(client, "Investigation", + query = Query(client, "Investigation", conditions={"id": "in (%d)" % investigation.id}) print(str(query)) res = client.search(query) @@ -294,8 +294,11 @@ def test_query_rule_order(client): res = client.search(query) assert len(res) == 104 -def test_query_nullable_warning(client, recwarn): - """Ordering on nullable relations emits a warning. +def test_query_rule_order_group(client, recwarn): + """Ordering rule on grouping implicitely adds a "grouping IS NOT NULL" + condition, because it is internally implemented using an INNER + JOIN between the tables. The Query class emits a warning about + this. """ recwarn.clear() query = Query(client, "Rule", order=['grouping', 'what', 'id']) @@ -306,17 +309,42 @@ def test_query_nullable_warning(client, recwarn): res = client.search(query) assert len(res) == 44 -def test_query_nullable_warning_suppressed(client, recwarn): +def test_query_rule_order_group_suppress_warn_cond(client, recwarn): """The warning can be suppressed by making the condition explicit. """ recwarn.clear() - query = Query(client, "Rule", order=['grouping', 'what', 'id'], - conditions={"grouping":"IS NOT NULL"}) + query = Query(client, "Rule", order=['grouping', 'what', 'id'], + conditions={"grouping": "IS NOT NULL"}) assert len(recwarn.list) == 0 print(str(query)) res = client.search(query) assert len(res) == 44 +def test_query_rule_order_group_suppress_warn_join(client, recwarn): + """Another option to suppress the warning is to override the JOIN spec. + By confirming the default INNER JOIN, we get the Rules having + grouping NOT NULL. + """ + recwarn.clear() + query = Query(client, "Rule", order=['grouping', 'what', 'id'], + join_specs={"grouping": "INNER JOIN"}) + assert len(recwarn.list) == 0 + print(str(query)) + res = client.search(query) + assert len(res) == 44 + +def test_query_rule_order_group_left_join(client, recwarn): + """Another option to suppress the warning is to override the JOIN spec. + By chosing a LEFT JOIN, we get all Rules. + """ + recwarn.clear() + query = Query(client, "Rule", order=['grouping', 'what', 'id'], + join_specs={"grouping": "LEFT OUTER JOIN"}) + assert len(recwarn.list) == 0 + print(str(query)) + res = client.search(query) + assert len(res) == 104 + def test_query_order_one_to_many(client, recwarn): """Sort on a related object in a one yo many relation. This has been enabled in #84, but a warning is still emitted. @@ -331,6 +359,79 @@ def test_query_order_one_to_many(client, recwarn): res = client.search(query) assert len(res) == 3 +def test_query_order_one_to_many_warning_suppressed(client, recwarn): + """Again, the warning can be suppressed by overriding the JOIN spec. + """ + recwarn.clear() + query = Query(client, "Investigation", + order=['investigationInstruments.instrument.fullName'], + join_specs={"investigationInstruments": "INNER JOIN"}) + assert len(recwarn.list) == 0 + print(str(query)) + res = client.search(query) + assert len(res) == 3 + +def test_query_order_one_to_many_duplicate(client, recwarn): + """Note that sorting on a one yo many relation may have surprising + effects on the result list. That is why class Query emits a + warning. + You may get duplicates in the result. + """ + recwarn.clear() + # The query without ORDER BY clause. + query = Query(client, "Investigation") + assert len(recwarn.list) == 0 + print(str(query)) + res = client.search(query) + assert len(res) == 3 + reference = res + # The same query adding a ORDER BY clause, we get two duplicates in + # the result. + recwarn.clear() + query = Query(client, "Investigation", order=['investigationUsers.role']) + w = recwarn.pop(icat.QueryOneToManyOrderWarning) + assert issubclass(w.category, icat.QueryOneToManyOrderWarning) + assert "investigationUsers" in str(w.message) + print(str(query)) + res = client.search(query) + assert len(res) == 5 + assert set(res) == set(reference) + +def test_query_order_one_to_many_missing(client, recwarn): + """Note that sorting on a one yo many relation may have surprising + effects on the result list. That is why class Query emits a + warning. + You may get misses in the result. + """ + recwarn.clear() + # The query without ORDER BY clause. + query = Query(client, "Sample") + assert len(recwarn.list) == 0 + print(str(query)) + res = client.search(query) + assert len(res) == 3 + reference = res + # The same query adding a ORDER BY clause, one item, a sample + # having no parameter with a string value, is missing from the result. + recwarn.clear() + query = Query(client, "Sample", order=['parameters.stringValue']) + w = recwarn.pop(icat.QueryOneToManyOrderWarning) + assert issubclass(w.category, icat.QueryOneToManyOrderWarning) + assert "parameters" in str(w.message) + print(str(query)) + res = client.search(query) + assert len(res) == 2 + # We can fix it in this case using a LEFT JOIN. + recwarn.clear() + query = Query(client, "Sample", + order=['parameters.stringValue'], + join_specs={"parameters": "LEFT JOIN"}) + assert len(recwarn.list) == 0 + print(str(query)) + res = client.search(query) + assert len(res) == 3 + assert set(res) == set(reference) + def test_query_order_suppress_warnings(client, recwarn): """Suppress all QueryWarnings. """ @@ -347,7 +448,7 @@ def test_query_order_suppress_warnings(client, recwarn): def test_query_limit(client): """Add a LIMIT clause to the last example. """ - query = Query(client, "Rule", order=['grouping', 'what', 'id'], + query = Query(client, "Rule", order=['grouping', 'what', 'id'], conditions={"grouping":"IS NOT NULL"}) query.setLimit( (0,10) ) print(str(query)) @@ -357,7 +458,7 @@ def test_query_limit(client): def test_query_limit_placeholder(client): """LIMIT clauses are particular useful with placeholders. """ - query = Query(client, "Rule", order=['grouping', 'what', 'id'], + query = Query(client, "Rule", order=['grouping', 'what', 'id'], conditions={"grouping":"IS NOT NULL"}) query.setLimit( ("%d","%d") ) print(str(query)) @@ -377,7 +478,7 @@ def test_query_non_ascii(client): # String literal with unicode characters that will be understood # by both Python 2 and Python 3. fullName = b'Rudolph Beck-D\xc3\xbclmen'.decode('utf8') - query = Query(client, "User", + query = Query(client, "User", conditions={ "fullName": "= '%s'" % fullName }) if sys.version_info < (3, 0): print(unicode(query)) @@ -397,10 +498,10 @@ def test_query_str(client): still a bug because a __str__() operator should not have any side effects at all. It was fixed in changes 4688517 and 905dd8c. """ - query = Query(client, "Datafile", order=True, + query = Query(client, "Datafile", order=True, conditions={ "dataset.investigation.id": - "= %d" % investigation.id }, - includes={"dataset", "datafileFormat.facility", + "= %d" % investigation.id }, + includes={"dataset", "datafileFormat.facility", "parameters.type.facility"}) r = repr(query) print(str(query)) @@ -526,7 +627,7 @@ def test_query_mulitple_attributes_distinct(client): """Combine DISTINCT with a query for multiple attributes. This requires a special handling due to some quirks in the - icat.server query poarser. Support for this has been added in + icat.server query parser. Support for this has been added in #81. """ if not client._has_wsdl_type('fieldSet'): @@ -582,7 +683,7 @@ def test_query_aggregate_distinct_attribute(client): Issue #32. """ require_icat_version("4.7.0", "SELECT DISTINCT in queries") - query = Query(client, "Datafile", + query = Query(client, "Datafile", attributes="datafileFormat.name", conditions={ "dataset.investigation.id": "= %d" % investigation.id }) @@ -602,7 +703,7 @@ def test_query_aggregate_distinct_related_obj(client): Issue #32. """ require_icat_version("4.7.0", "SELECT DISTINCT in queries") - query = Query(client, "Datafile", + query = Query(client, "Datafile", attributes="datafileFormat", conditions={ "dataset.investigation.id": "= %d" % investigation.id })