Skip to content

Commit 677ae18

Browse files
authored
Support generic sorting (#3373)
* Support generic sorting PBENCH-1126 With pagination, a client (e.g., the dashboard) can't rely on client-side column sorting. Instead, add generalize sorting to `GET /datasets`, allowing the returned datasets to be sorted by any column or metadata value, either ascending (default) or descending. `GET /api/v1/datasets?sort=user.dashboard.favorite:desc,dataset.uploaded` will return all accessible datasets, sorted first by whether the authenticated user has marked the dataset "favorite" and second by the upload timestamp. (All "favorited" datasets will appear first, in upload order, followed by all "non- favorited" datasets in upload order.)
1 parent 6a5b263 commit 677ae18

File tree

4 files changed

+264
-52
lines changed

4 files changed

+264
-52
lines changed

docs/API/V1/list.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ can only list datasets with access `public`.)
66

77
The collection of datasets may be filtered using any combination of a number
88
of query parameters, including `owner`, `access`, `name` substring, date range,
9-
and arbitrary metadata filter expressions.
9+
and arbitrary metadata filter expressions. The selected datasets may be sorted
10+
by any metadata key value in either ascending or descending order. Multiple
11+
sort parameters will be processed in order.
1012

1113
Large collections can be paginated for efficiency using the `limit` and `offset`
1214
query parameters.
@@ -113,6 +115,24 @@ with a paginated display or to limit data transfer requirements.
113115
Select only datasets owned by the specified username. Unless the username
114116
matches the authenticated user, only "public" datasets can be selected.
115117

118+
`sort` sort expression \
119+
Sort the returned datasets by one or more sort expressions. You can separate
120+
multiple expressions using comma lists, or across separate `sort` query
121+
parameters, which will be processed in order. Any Metadata namespace key can
122+
be specified.
123+
124+
Specify a sort order using the keywords `asc` (ascending) or `desc`
125+
(descending), separated from the key name with a colon (`:`). For example,
126+
`dataset.name:asc` or `dataset.metalog.pbench.script:desc`. The default is
127+
"ascending" if no order is specified. If no sort expressions are specified,
128+
datasets are returned sorted by `dataset.resource_id`.
129+
130+
For example, `GET /api/v1/datasets?sort=global.dashboard.seen:desc,dataset.name`
131+
will return selected datasets sorted first in descending order based on whether
132+
the dataset has been marked "seen" by the dashboard, and secondly sorted by the
133+
dataset name. The Pbench Dashboard stores `global.dashboard.seen` as a `boolean`
134+
value, so in this case `true` values will appear before `false` values.
135+
116136
`start` date/time \
117137
Select only datasets created on or after the specified time. Time should be
118138
specified in ISO standard format, as `YYYY-MM-DDThh:mm:ss.ffffff[+|-]HH:MM`.

lib/pbench/server/api/resources/datasets_list.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from flask import current_app
66
from flask.json import jsonify
77
from flask.wrappers import Request, Response
8-
from sqlalchemy import and_, cast, func, or_, String
8+
from sqlalchemy import and_, asc, cast, desc, func, or_, String
99
from sqlalchemy.exc import ProgrammingError, StatementError
1010
from sqlalchemy.orm import aliased, Query
1111
from sqlalchemy.sql.expression import Alias
@@ -80,6 +80,12 @@ def __init__(self, config: PbenchServerConfig):
8080
string_list=",",
8181
metalog_ok=True,
8282
),
83+
Parameter(
84+
"sort",
85+
ParamType.LIST,
86+
element_type=ParamType.STRING,
87+
string_list=",",
88+
),
8389
),
8490
authorization=ApiAuthorizationType.USER_ACCESS,
8591
),
@@ -105,7 +111,7 @@ def get_paginated_obj(
105111
start to narrow down the result.
106112
"""
107113
paginated_result = {}
108-
query = query.order_by(Dataset.resource_id).distinct()
114+
query = query.distinct()
109115
total_count = query.count()
110116

111117
# Shift the query search by user specified offset value,
@@ -222,7 +228,7 @@ def filter_query(
222228
k, v = kw.split(":", maxsplit=1)
223229
except ValueError:
224230
raise APIAbort(
225-
HTTPStatus.BAD_REQUEST, f"filter {kw!r} must have the form 'k=v'"
231+
HTTPStatus.BAD_REQUEST, f"filter {kw!r} must have the form 'k:v'"
226232
)
227233
if k.startswith("^"):
228234
combine_or = True
@@ -372,20 +378,71 @@ def daterange(self, query: Query) -> JSONOBJECT:
372378
else:
373379
return {}
374380

375-
def datasets(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT:
381+
def datasets(
382+
self, request: Request, aliases: dict[str, Any], json: JSONOBJECT, query: Query
383+
) -> JSONOBJECT:
376384
"""Gather and paginate the selected datasets
377385
378386
Run the query we've compiled, with pagination limits applied; collect
379387
results into a list of JSON objects including selected metadata keys.
380388
381389
Args:
382390
request: The HTTP Request object
391+
aliases: Map of join column aliases for each Metadata namespace
383392
json: The JSON query parameters
384393
query: The basic filtered SQLAlchemy query object
385394
386395
Returns:
387396
The paginated dataset listing
388397
"""
398+
399+
# Process a possible list of sort terms. By default, we sort by the
400+
# dataset resource_id.
401+
sorters = []
402+
for sort in json.get("sort", ["dataset.resource_id"]):
403+
if ":" not in sort:
404+
k = sort
405+
order = asc
406+
else:
407+
k, o = sort.split(":", maxsplit=1)
408+
if o.lower() == "asc":
409+
order = asc
410+
elif o.lower() == "desc":
411+
order = desc
412+
else:
413+
raise APIAbort(
414+
HTTPStatus.BAD_REQUEST,
415+
f"The sort order {o!r} for key {k!r} must be 'asc' or 'desc'",
416+
)
417+
418+
if not Metadata.is_key_path(k, Metadata.METADATA_KEYS, metalog_key_ok=True):
419+
raise APIAbort(HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k)))
420+
keys = k.split(".")
421+
native_key = keys.pop(0).lower()
422+
sorter = None
423+
if native_key == Metadata.DATASET:
424+
second = keys[0].lower()
425+
# The dataset namespace requires special handling because
426+
# "dataset.metalog" is really a special native key space
427+
# named "metalog", while other "dataset" sub-keys are primary
428+
# columns in the Dataset table.
429+
if second == Metadata.METALOG:
430+
native_key = keys.pop(0).lower()
431+
else:
432+
try:
433+
c = getattr(Dataset, second)
434+
except AttributeError as e:
435+
raise APIAbort(
436+
HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k))
437+
) from e
438+
sorter = order(c)
439+
if sorter is None:
440+
sorter = order(aliases[native_key].value[keys])
441+
sorters.append(sorter)
442+
443+
# Apply our list of sort terms
444+
query = query.order_by(*sorters)
445+
389446
try:
390447
datasets, paginated_result = self.get_paginated_obj(
391448
query=query, json=json, url=request.url
@@ -534,5 +591,5 @@ def _get(
534591
result.update(self.daterange(query))
535592
done = True
536593
if not done:
537-
result = self.datasets(request, json, query)
594+
result = self.datasets(request, aliases, json, query)
538595
return jsonify(result)

lib/pbench/test/unit/server/conftest.py

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,10 @@ def more_datasets(
386386
test 20 private 1970-01-01:00:42
387387
fio_1 3 public 1978-06-26:08:00
388388
fio_2 20 public 2022-01-01:00:00
389-
uperf_1 20 private 1978-06-26:08:00
390-
uperf_2 20 private 1978-06-26:08:00
391-
uperf_3 20 private 1978-06-26:08:00
392-
uperf_4 20 private 1978-06-26:08:00
389+
uperf_1 20 private 1978-06-26:08:01
390+
uperf_2 20 private 1978-06-26:09:00
391+
uperf_3 20 private 1978-06-26:09:30
392+
uperf_4 20 private 1978-06-26:10:00
393393
394394
Args:
395395
client: Provide a Flask API client
@@ -399,44 +399,48 @@ def more_datasets(
399399
attach_dataset: Provide some datasets
400400
create_user: Create the "test" user
401401
"""
402-
with freeze_time("1978-06-26 08:00:00"):
403-
Dataset(
404-
owner=create_drb_user,
405-
name="fio_1",
406-
access="public",
407-
resource_id="random_md5_string3",
408-
).add()
409-
Dataset(
410-
owner=create_user,
411-
uploaded=datetime.datetime(2022, 1, 1),
412-
name="fio_2",
413-
access="public",
414-
resource_id="random_md5_string4",
415-
).add()
416-
Dataset(
417-
owner=create_user,
418-
name="uperf_1",
419-
access="private",
420-
resource_id="random_md5_string5",
421-
).add()
422-
Dataset(
423-
owner=create_user,
424-
name="uperf_2",
425-
access="private",
426-
resource_id="random_md5_string6",
427-
).add()
428-
Dataset(
429-
owner=create_user,
430-
name="uperf_3",
431-
access="private",
432-
resource_id="random_md5_string7",
433-
).add()
434-
Dataset(
435-
owner=create_user,
436-
name="uperf_4",
437-
access="private",
438-
resource_id="random_md5_string8",
439-
).add()
402+
Dataset(
403+
owner=create_drb_user,
404+
uploaded=datetime.datetime(1978, 6, 26, 8, 0, 0, 0),
405+
name="fio_1",
406+
access="public",
407+
resource_id="random_md5_string3",
408+
).add()
409+
Dataset(
410+
owner=create_user,
411+
uploaded=datetime.datetime(2022, 1, 1),
412+
name="fio_2",
413+
access="public",
414+
resource_id="random_md5_string4",
415+
).add()
416+
Dataset(
417+
owner=create_user,
418+
uploaded=datetime.datetime(1978, 6, 26, 8, 1, 0, 0),
419+
name="uperf_1",
420+
access="private",
421+
resource_id="random_md5_string5",
422+
).add()
423+
Dataset(
424+
owner=create_user,
425+
uploaded=datetime.datetime(1978, 6, 26, 9, 0, 0, 0),
426+
name="uperf_2",
427+
access="private",
428+
resource_id="random_md5_string6",
429+
).add()
430+
Dataset(
431+
owner=create_user,
432+
uploaded=datetime.datetime(1978, 6, 26, 9, 30, 0, 0),
433+
name="uperf_3",
434+
access="private",
435+
resource_id="random_md5_string7",
436+
).add()
437+
Dataset(
438+
owner=create_user,
439+
uploaded=datetime.datetime(1978, 6, 26, 10, 0, 0, 0),
440+
name="uperf_4",
441+
access="private",
442+
resource_id="random_md5_string8",
443+
).add()
440444

441445

442446
@pytest.fixture()

0 commit comments

Comments
 (0)