diff --git a/CHANGES.rst b/CHANGES.rst index 2e00ac80..37cfa8b9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,32 @@ Changelog ========= +0.20.0 (not yet released) +~~~~~~~~~~~~~~~~~~~~~~~~~ + +New features +------------ + ++ `#86`_, `#89`_: allow SQL functions to be used on the attributes in + the condtions and order keys in class :class:`icat.query.Query`. + +Incompatible changes and new bugs +--------------------------------- + ++ `#94`_: the implementation of `#89`_ changed the internal data + structures in :attr:`icat.query.Query.conditions` and + :attr:`icat.query.Query.order`. These attributes are considered + internal and deliberately not documented, so one could argue that + this is not an incompatible change, though. But the changes also + have an impact on the return value of + :meth:`icat.query.Query.__repr__` such that it is not suitable to + recreate the query object. + +.. _#86: https://github.com/icatproject/python-icat/issues/86 +.. _#89: https://github.com/icatproject/python-icat/pull/89 +.. _#94: https://github.com/icatproject/python-icat/issues/94 + + 0.19.0 (2021-07-20) ~~~~~~~~~~~~~~~~~~~ diff --git a/doc/src/tutorial-search.rst b/doc/src/tutorial-search.rst index f0b410ce..ad7b0386 100644 --- a/doc/src/tutorial-search.rst +++ b/doc/src/tutorial-search.rst @@ -183,6 +183,40 @@ We may also include related objects in the search results:: visitId = "1.1-N" }] +python-icat supports the use of some JPQL functions when specifying +which attribute a condition should be applied to. Consider the +following query:: + + >>> query = Query(client, "Investigation", conditions={"LENGTH(title)": "= 18"}) + >>> print(query) + SELECT o FROM Investigation o WHERE LENGTH(o.title) = 18 + >>> client.search(query) + [(investigation){ + createId = "simple/root" + createTime = 2021-10-05 14:09:57+00:00 + id = 430 + modId = "simple/root" + modTime = 2021-10-05 14:09:57+00:00 + doi = "00.0815/inv-00601" + endDate = 2010-10-12 15:00:00+00:00 + name = "10100601-ST" + startDate = 2010-09-30 10:27:24+00:00 + title = "Ni-Mn-Ga flat cone" + visitId = "1.1-N" + }, (investigation){ + createId = "simple/root" + createTime = 2021-10-05 14:09:58+00:00 + id = 431 + modId = "simple/root" + modTime = 2021-10-05 14:09:58+00:00 + doi = "00.0815/inv-00409" + endDate = 2012-08-06 01:10:08+00:00 + name = "12100409-ST" + startDate = 2012-07-26 15:44:24+00:00 + title = "NiO SC OF1 JUH HHL" + visitId = "1.1-P" + }] + The conditions in a query may also be put on the attributes of related objects. This allows rather complex queries. Let us search for the datasets in this investigation that have been measured in a magnetic @@ -668,6 +702,27 @@ dataset parameter, ordered by parameter type name (ascending), units } }] +In a similar way as for `conditions`, we may use JPQL functions also +in the `order` argument to :class:`~icat.query.Query`. Let's search +for user sorted by the length of their name, from longest to +shortest:: + + >>> query = Query(client, "User", conditions={"fullName": "IS NOT NULL"}, order=[("LENGTH(fullName)", "DESC")]) + >>> print(query) + SELECT o FROM User o WHERE o.fullName IS NOT NULL ORDER BY LENGTH(o.fullName) DESC + >>> for user in client.search(query): + ... print("%d: %s" % (len(user.fullName), user.fullName)) + ... + 19: Rudolph Beck-Dülmen + 19: Jean-Baptiste Botul + 16: Nicolas Bourbaki + 13: Aelius Cordus + 11: User Office + 10: Arnold Hau + 10: IDS reader + 8: John Doe + 4: Root + We may limit the number of returned items. Search for the second to last dataset to have been finished:: diff --git a/icat/query.py b/icat/query.py index e8e84eb0..ade7f9d9 100644 --- a/icat/query.py +++ b/icat/query.py @@ -1,6 +1,8 @@ """Provide the Query class. """ +from collections import OrderedDict +import re from warnings import warn try: # Python 3.3 and newer @@ -111,6 +113,8 @@ class Query(object): add the `join_specs` argument. """ + _db_func_re = re.compile(r"(?:([A-Za-z_]+)\()?([A-Za-z.]+)(?(1)\))") + def __init__(self, client, entity, attributes=None, aggregate=None, order=None, conditions=None, includes=None, limit=None, @@ -215,6 +219,12 @@ def _dosubst(self, obj, subst, addas=True): n += " AS %s" % (subst[obj]) return n + def _split_db_functs(self, attr): + m = self._db_func_re.fullmatch(attr) + if not m: + raise ValueError("Invalid attribute '%s'" % attr) + return m.group(2,1) + def setAttributes(self, attributes): """Set the attributes that the query shall return. @@ -315,26 +325,35 @@ def setOrder(self, order): :param order: the list of the attributes used for sorting. A special value of :const:`True` may be used to indicate the natural order of the entity type. Any false value means - no ORDER BY clause. Rather then only an attribute name, - any item in the list may also be a tuple of an attribute - name and an order direction, the latter being either "ASC" - or "DESC" for ascending or descending order respectively. + no ORDER BY clause. The attribute name can be wrapped + with a JPQL function (such as "LENGTH(title)"). Rather + then only an attribute name, any item in the list may also + be a tuple of an attribute name and an order direction, + the latter being either "ASC" or "DESC" for ascending or + descending order respectively. :type order: iterable or :class:`bool` - :raise ValueError: if any attribute in `order` is not valid. + :raise ValueError: if any attribute in `order` is not valid or + if any attribute appears more than once in the resulting + ORDER BY clause. .. versionchanged:: 0.19.0 allow one to many relationships in `order`. Emit a :exc:`~icat.exception.QueryOneToManyOrderWarning` rather then raising a :exc:`ValueError` in this case. + .. versionchanged:: 0.20.0 + allow a JPQL function in the attribute. """ + # Note: with Python 3.7 and newer we could simplify this using + # a standard dict() rather then an OrderedDict(). + self.order = OrderedDict() + if order is True: - self.order = [ (a, None) - for a in self.entity.getNaturalOrder(self.client) ] + for a in self.entity.getNaturalOrder(self.client): + self.order[a] = "%s" elif order: - self.order = [] for obj in order: if isinstance(obj, tuple): @@ -344,8 +363,9 @@ def setOrder(self, order): % direction) else: direction = None + attr, jpql_func = self._split_db_functs(obj) - for (pattr, attrInfo, rclass) in self._attrpath(obj): + for (pattr, attrInfo, rclass) in self._attrpath(attr): if attrInfo.relType == "ONE": if (not attrInfo.notNullable and pattr not in self.conditions and @@ -359,19 +379,33 @@ def setOrder(self, order): warn(QueryOneToManyOrderWarning(pattr), stacklevel=sl) + if jpql_func: + if rclass is not None: + raise ValueError("Cannot apply a JPQL function " + "to a related object: %s" % obj) + if direction: + vstr = "%s(%%s) %s" % (jpql_func, direction) + else: + vstr = "%s(%%s)" % jpql_func + else: + if direction: + vstr = "%%s %s" % direction + else: + vstr = "%s" if rclass is None: - # obj is an attribute, use it right away. - self.order.append( (obj, direction) ) + # attr is an attribute, use it right away. + if attr in self.order: + raise ValueError("Cannot add %s more than once" % attr) + self.order[attr] = vstr else: - # obj is a related object, use the natural order + # attr is a related object, use the natural order # of its class. - rorder = rclass.getNaturalOrder(self.client) - self.order.extend([ ("%s.%s" % (obj, ra), direction) - for ra in rorder ]) - - else: - - self.order = [] + for ra in rclass.getNaturalOrder(self.client): + rattr = "%s.%s" % (attr, ra) + if rattr in self.order: + raise ValueError("Cannot add %s more than once" + % rattr) + self.order[rattr] = vstr def addConditions(self, conditions): """Add conditions to the constraints to build the WHERE clause from. @@ -380,30 +414,38 @@ def addConditions(self, conditions): result. This must be a mapping of attribute names to conditions on that attribute. The latter may either be a string with a single condition or a list of strings to add - more then one condition on a single attribute. If the - query already has a condition on a given attribute, it - will be turned into a list with the new condition(s) - appended. + more then one condition on a single attribute. The + attribute name (the key of the condition) can be wrapped + with a JPQL function (such as "UPPER(title)"). If the + query already has a condition on a given attribute, the + previous condition(s) will be retained and the new + condition(s) added to that. :type conditions: :class:`dict` :raise ValueError: if any key in `conditions` is not valid. + + .. versionchanged:: 0.20.0 + allow a JPQL function in the attribute. """ + def _cond_value(rhs, func): + rhs = rhs.replace('%', '%%') + if func: + return "%s(%%s) %s" % (func, rhs) + else: + return "%%s %s" % (rhs) if conditions: - for a in conditions.keys(): + for k in conditions.keys(): + if isinstance(conditions[k], basestring): + conds = [conditions[k]] + else: + conds = conditions[k] + a, jpql_func = self._split_db_functs(k) for (pattr, attrInfo, rclass) in self._attrpath(a): pass + v = [ _cond_value(rhs, jpql_func) for rhs in conds ] if a in self.conditions: - conds = [] - if isinstance(self.conditions[a], basestring): - conds.append(self.conditions[a]) - else: - conds.extend(self.conditions[a]) - if isinstance(conditions[a], basestring): - conds.append(conditions[a]) - else: - conds.extend(conditions[a]) - self.conditions[a] = conds + self.conditions[a].extend(v) else: - self.conditions[a] = conditions[a] + self.conditions[a] = v def addIncludes(self, includes): """Add related objects to build the INCLUDE clause from. @@ -462,7 +504,7 @@ def __str__(self): usefulness over formal correctness. For Python 3, there is no distinction between Unicode and string objects anyway. """ - joinattrs = ( { a for a, d in self.order } | + joinattrs = ( set(self.order.keys()) | set(self.conditions.keys()) | set(self.attributes) ) subst = self._makesubst(joinattrs) @@ -494,23 +536,15 @@ def __str__(self): conds = [] for a in sorted(self.conditions.keys()): attr = self._dosubst(a, subst, False) - cond = self.conditions[a] - if isinstance(cond, basestring): - conds.append("%s %s" % (attr, cond)) - else: - for c in cond: - conds.append("%s %s" % (attr, c)) + for c in self.conditions[a]: + conds.append(c % attr) where = " WHERE " + " AND ".join(conds) else: where = "" if self.order: orders = [] - for a, d in self.order: - a = self._dosubst(a, subst, False) - if d: - orders.append("%s %s" % (a, d)) - else: - orders.append(a) + for a in self.order.keys(): + orders.append(self.order[a] % self._dosubst(a, subst, False)) order = " ORDER BY " + ", ".join(orders) else: order = "" @@ -535,7 +569,9 @@ def copy(self): q.attributes = list(self.attributes) q.aggregate = self.aggregate q.order = list(self.order) - q.conditions = self.conditions.copy() + q.conditions = dict() + for k, v in self.conditions.items(): + q.conditions[k] = self.conditions[k].copy() q.includes = self.includes.copy() q.limit = self.limit return q diff --git a/tests/test_06_query.py b/tests/test_06_query.py index f41d0292..f6abd158 100644 --- a/tests/test_06_query.py +++ b/tests/test_06_query.py @@ -150,7 +150,7 @@ def test_query_datacollection(client): """ query = Query(client, "DataCollection", order=True) print(str(query)) - assert ("id", None) in query.order + assert "id" in query.order res = client.search(query) assert len(res) == 2 @@ -285,12 +285,63 @@ def test_query_condition_obj(client): res = client.search(query) assert len(res) == 60 +def test_query_condition_jpql_function(client): + """Functions may be applied to field names of conditions. + This test also applies `UPPER()` on the data to mitigate instances + of Oracle databases which are case sensitive. + """ + conditions = { + "UPPER(title)": "like UPPER('%Ni-Mn-Ga flat cone%')", + "UPPER(datasets.name)": "like UPPER('%e208341%')", + } + query = Query(client, "Investigation", conditions=conditions) + print(str(query)) + res = client.search(query) + assert len(res) == 1 + +def test_query_condition_jpql_function_namelen(client): + """Functions may be applied to field names of conditions. + Similar to last test, but picking another example where the effect + of the JPQL function in the condition is easier to verify in the + result. + """ + conditions = { "LENGTH(fullName)": "> 11" } + query = Query(client, "User", conditions=conditions) + print(str(query)) + res = client.search(query) + assert len(res) == 4 + +def test_query_condition_jpql_function_mixed(client): + """Mix conditions with and without JPQL function on the same attribute. + This test case failed for an early implementation of JPQL + functions, see discussion in #89. + """ + conditions = { "LENGTH(fullName)": "> 11", "fullName": "> 'C'" } + query = Query(client, "User", conditions=conditions) + print(str(query)) + res = client.search(query) + assert len(res) == 3 + +def test_query_order_jpql_function(client): + """Functions may be applied to attribute names in order. + + As an example, search for the User having the third longest + fullName. (In the example data, the longest and second longest + fullName is somewhat ambiguous due to character encoding issues.) + """ + query = Query(client, "User", + order=[("LENGTH(fullName)", "DESC")], limit=(2,1)) + print(str(query)) + res = client.search(query) + assert len(res) == 1 + assert res[0].fullName == "Nicolas Bourbaki" + def test_query_rule_order(client): """Rule does not have a constraint, id is included in the natural order. """ query = Query(client, "Rule", order=True) print(str(query)) - assert ("id", None) in query.order + assert "id" in query.order res = client.search(query) assert len(res) == 104