Skip to content

Commit aeda786

Browse files
committed
Support generic sorting
PBENCH-1126 With pagination, a client (e.g., the dashboard) can't rely on client-side column sorting. Instead, add generalize sorting to `GET /datasets`, allowing the returned datasets to be sorted by any column or metadata value, either ascending (default) or descending. `GET /api/v1/datasets?sort=user.dashboard.favorite:desc,dataset.uploaded` will return all accessible datasets, sorted first by whether the authenticated user has marked the dataset "favorite" and second by the upload timestamp. (All "favorited" datasets will appear first, in upload order, followed by all "non- favorited" datasets in upload order.)
1 parent 82cc01b commit aeda786

File tree

4 files changed

+213
-52
lines changed

4 files changed

+213
-52
lines changed

docs/API/V1/list.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ can only list datasets with access `public`.)
66

77
The collection of datasets may be filtered using any combination of a number
88
of query parameters, including `owner`, `access`, `name` substring, date range,
9-
and arbitrary metadata filter expressions.
9+
and arbitrary metadata filter expressions. The selected datasets may be sorted
10+
by any metadata key value in either ascending or descending order. Multiple
11+
sort parameters will be processed in order.
1012

1113
Large collections can be paginated for efficiency using the `limit` and `offset`
1214
query parameters.
@@ -90,6 +92,24 @@ with a paginated display or to limit data transfer requirements.
9092
Select only datasets owned by the specified username. Unless the username
9193
matches the authenticated user, only "public" datasets can be selected.
9294

95+
`sort` sort expression \
96+
Sort the returned datasets by one or more sort expressions. You can separate
97+
multiple expressions using comma lists, or across separate `sort` query
98+
parameters, which will be processed in order. Any Metadata namespace key can
99+
be specified.
100+
101+
Specify a sort order using the keywords `asc` (ascending) or `desc`
102+
(descending), separated from the key name with a colon (`:`). For example,
103+
`dataset.name:asc` or `dataset.metalog.pbench.script:desc`. The default is
104+
"ascending" if no order is specified. If no sort expressions are specified,
105+
datasets are returned sorted by `dataset.resource_id`.
106+
107+
For example, `GET /api/v1/datasets?sort=global.dashboard.seen:desc,dataset.name`
108+
will return selected datasets sorted first in descending order based on whether
109+
the dataset has been marked "seen" by the dashboard, and secondly sorted by the
110+
dataset name. The Pbench Dashboard stores `global.dashboard.seen` as a `boolean`
111+
value, so in this case `true` values will appear before `false` values.
112+
93113
`start` date/time \
94114
Select only datasets created on or after the specified time. Time should be
95115
specified in ISO standard format, as `YYYY-MM-DDThh:mm:ss.ffffff[+|-]HH:MM`.

lib/pbench/server/api/resources/datasets_list.py

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from flask import current_app
66
from flask.json import jsonify
77
from flask.wrappers import Request, Response
8-
from sqlalchemy import and_, cast, or_, String
8+
from sqlalchemy import and_, asc, cast, desc, or_, String
99
from sqlalchemy.exc import ProgrammingError, StatementError
1010
from sqlalchemy.orm import aliased, Query
1111
from sqlalchemy.sql.expression import Alias
@@ -79,6 +79,12 @@ def __init__(self, config: PbenchServerConfig):
7979
string_list=",",
8080
metalog_ok=True,
8181
),
82+
Parameter(
83+
"sort",
84+
ParamType.LIST,
85+
element_type=ParamType.STRING,
86+
string_list=",",
87+
),
8288
),
8389
authorization=ApiAuthorizationType.USER_ACCESS,
8490
),
@@ -104,7 +110,7 @@ def get_paginated_obj(
104110
start to narrow down the result.
105111
"""
106112
paginated_result = {}
107-
query = query.order_by(Dataset.resource_id).distinct()
113+
query = query.distinct()
108114
total_count = query.count()
109115

110116
# Shift the query search by user specified offset value,
@@ -221,7 +227,7 @@ def filter_query(
221227
k, v = kw.split(":", maxsplit=1)
222228
except ValueError:
223229
raise APIAbort(
224-
HTTPStatus.BAD_REQUEST, f"filter {kw!r} must have the form 'k=v'"
230+
HTTPStatus.BAD_REQUEST, f"filter {kw!r} must have the form 'k:v'"
225231
)
226232
if k.startswith("^"):
227233
combine_or = True
@@ -348,20 +354,77 @@ def keyspace(self, query: Query) -> JSONOBJECT:
348354
self.accumulate(aggregate, m.key, m.value)
349355
return aggregate
350356

351-
def datasets(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT:
357+
def datasets(
358+
self, request: Request, aliases: dict[str, Any], json: JSONOBJECT, query: Query
359+
) -> JSONOBJECT:
352360
"""Gather and paginate the selected datasets
353361
354362
Run the query we've compiled, with pagination limits applied; collect
355363
results into a list of JSON objects including selected metadata keys.
356364
357365
Args:
358366
request: The HTTP Request object
367+
aliases: Map of join column aliases for each Metadata namespace
359368
json: The JSON query parameters
360369
query: The basic filtered SQLAlchemy query object
361370
362371
Returns:
363372
The paginated dataset listing
364373
"""
374+
375+
# Process a possible list of sort terms. By default, we sort by the
376+
# dataset resource_id.
377+
sorters = []
378+
for sort in json.get("sort", ["dataset.resource_id"]):
379+
if ":" not in sort:
380+
k = sort
381+
order = asc
382+
else:
383+
try:
384+
k, o = sort.split(":", maxsplit=1)
385+
except ValueError:
386+
raise APIAbort(
387+
HTTPStatus.BAD_REQUEST,
388+
f"sort {sort!r} must have the form 'k[:o]'",
389+
)
390+
if o.lower() == "asc":
391+
order = asc
392+
elif o.lower() == "desc":
393+
order = desc
394+
else:
395+
raise APIAbort(
396+
HTTPStatus.BAD_REQUEST,
397+
f"sort order in {sort!r} must be 'asc' or 'desc'",
398+
)
399+
400+
if not Metadata.is_key_path(k, Metadata.METADATA_KEYS, metalog_key_ok=True):
401+
raise APIAbort(HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k)))
402+
keys = k.split(".")
403+
native_key = keys.pop(0).lower()
404+
sorter = None
405+
if native_key == Metadata.DATASET:
406+
second = keys[0].lower()
407+
# The dataset namespace requires special handling because
408+
# "dataset.metalog" is really a special native key space
409+
# named "metalog", while other "dataset" sub-keys are primary
410+
# columns in the Dataset table.
411+
if second == Metadata.METALOG:
412+
native_key = keys.pop(0).lower()
413+
else:
414+
try:
415+
c = getattr(Dataset, second)
416+
except AttributeError as e:
417+
raise APIAbort(
418+
HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k))
419+
) from e
420+
sorter = order(c)
421+
if sorter is None:
422+
sorter = order(aliases[native_key].value[keys])
423+
sorters.append(sorter)
424+
425+
# Apply our list of sort terms
426+
query = query.order_by(*sorters)
427+
365428
try:
366429
datasets, paginated_result = self.get_paginated_obj(
367430
query=query, json=json, url=request.url
@@ -498,4 +561,4 @@ def _get(
498561
if json.get("keysummary"):
499562
return jsonify(self.keyspace(query))
500563
else:
501-
return jsonify(self.datasets(request, json, query))
564+
return jsonify(self.datasets(request, aliases, json, query))

lib/pbench/test/unit/server/conftest.py

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,10 @@ def more_datasets(
386386
test 20 private 1970-01-01:00:42
387387
fio_1 3 public 1978-06-26:08:00
388388
fio_2 20 public 2022-01-01:00:00
389-
uperf_1 20 private 1978-06-26:08:00
390-
uperf_2 20 private 1978-06-26:08:00
391-
uperf_3 20 private 1978-06-26:08:00
392-
uperf_4 20 private 1978-06-26:08:00
389+
uperf_1 20 private 1978-06-26:08:10
390+
uperf_2 20 private 1978-06-26:09:01
391+
uperf_3 20 private 1978-06-26:09:30
392+
uperf_4 20 private 1978-06-26:10:00
393393
394394
Args:
395395
client: Provide a Flask API client
@@ -399,44 +399,48 @@ def more_datasets(
399399
attach_dataset: Provide some datasets
400400
create_user: Create the "test" user
401401
"""
402-
with freeze_time("1978-06-26 08:00:00"):
403-
Dataset(
404-
owner=create_drb_user,
405-
name="fio_1",
406-
access="public",
407-
resource_id="random_md5_string3",
408-
).add()
409-
Dataset(
410-
owner=create_user,
411-
uploaded=datetime.datetime(2022, 1, 1),
412-
name="fio_2",
413-
access="public",
414-
resource_id="random_md5_string4",
415-
).add()
416-
Dataset(
417-
owner=create_user,
418-
name="uperf_1",
419-
access="private",
420-
resource_id="random_md5_string5",
421-
).add()
422-
Dataset(
423-
owner=create_user,
424-
name="uperf_2",
425-
access="private",
426-
resource_id="random_md5_string6",
427-
).add()
428-
Dataset(
429-
owner=create_user,
430-
name="uperf_3",
431-
access="private",
432-
resource_id="random_md5_string7",
433-
).add()
434-
Dataset(
435-
owner=create_user,
436-
name="uperf_4",
437-
access="private",
438-
resource_id="random_md5_string8",
439-
).add()
402+
Dataset(
403+
owner=create_drb_user,
404+
uploaded=datetime.datetime(1978, 6, 26, 8, 0, 0, 0),
405+
name="fio_1",
406+
access="public",
407+
resource_id="random_md5_string3",
408+
).add()
409+
Dataset(
410+
owner=create_user,
411+
uploaded=datetime.datetime(2022, 1, 1),
412+
name="fio_2",
413+
access="public",
414+
resource_id="random_md5_string4",
415+
).add()
416+
Dataset(
417+
owner=create_user,
418+
uploaded=datetime.datetime(1978, 6, 26, 8, 1, 0, 0),
419+
name="uperf_1",
420+
access="private",
421+
resource_id="random_md5_string5",
422+
).add()
423+
Dataset(
424+
owner=create_user,
425+
uploaded=datetime.datetime(1978, 6, 26, 9, 0, 0, 0),
426+
name="uperf_2",
427+
access="private",
428+
resource_id="random_md5_string6",
429+
).add()
430+
Dataset(
431+
owner=create_user,
432+
uploaded=datetime.datetime(1978, 6, 26, 9, 30, 0, 0),
433+
name="uperf_3",
434+
access="private",
435+
resource_id="random_md5_string7",
436+
).add()
437+
Dataset(
438+
owner=create_user,
439+
uploaded=datetime.datetime(1978, 6, 26, 10, 0, 0, 0),
440+
name="uperf_4",
441+
access="private",
442+
resource_id="random_md5_string8",
443+
).add()
440444

441445

442446
@pytest.fixture()

lib/pbench/test/unit/server/test_datasets_list.py

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import pytest
77
import requests
8-
from sqlalchemy import and_
8+
from sqlalchemy import and_, desc
99
from sqlalchemy.exc import ProgrammingError
1010
from sqlalchemy.orm import aliased, Query
1111

@@ -14,6 +14,7 @@
1414
from pbench.server.api.resources.datasets_list import DatasetsList, urlencode_json
1515
from pbench.server.database.database import Database
1616
from pbench.server.database.models.datasets import Dataset, Metadata
17+
from pbench.server.database.models.users import User
1718
from pbench.test.unit.server import DRB_USER_ID
1819

1920
FLATTEN = re.compile(r"[\n\s]+")
@@ -147,7 +148,7 @@ def get_results(self, name_list: list[str], query: JSON, server_config) -> JSON:
147148
paginated_name_list = name_list[offset:]
148149
next_url = ""
149150

150-
for name in sorted(paginated_name_list):
151+
for name in paginated_name_list:
151152
dataset = Dataset.query(name=name)
152153
results.append(
153154
{
@@ -175,8 +176,8 @@ def compare_results(
175176
expected = self.get_results(name_list, query, server_config)
176177
for k, v in result.items():
177178
if k == "results":
178-
assert sorted(v, key=lambda d: d["resource_id"]) == sorted(
179-
expected[k], key=lambda d: d["resource_id"]
179+
assert (
180+
v == expected[k]
180181
), f"Actual {k}={v} doesn't match expected {expected[k]}"
181182
else:
182183
assert (
@@ -698,3 +699,76 @@ def test_key_summary(self, query_as):
698699
"origin": None,
699700
},
700701
}
702+
703+
@pytest.mark.parametrize(
704+
"sort,results",
705+
[
706+
(
707+
"dataset.name",
708+
["fio_1", "fio_2", "test", "uperf_1", "uperf_2", "uperf_3", "uperf_4"],
709+
),
710+
(
711+
"dataset.name:asc",
712+
["fio_1", "fio_2", "test", "uperf_1", "uperf_2", "uperf_3", "uperf_4"],
713+
),
714+
(
715+
"dataset.name:desc",
716+
["uperf_4", "uperf_3", "uperf_2", "uperf_1", "test", "fio_2", "fio_1"],
717+
),
718+
(
719+
"dataset.uploaded",
720+
["test", "fio_1", "uperf_1", "uperf_2", "uperf_3", "uperf_4", "fio_2"],
721+
),
722+
(
723+
"dataset.uploaded:desc",
724+
["fio_2", "uperf_4", "uperf_3", "uperf_2", "uperf_1", "fio_1", "test"],
725+
),
726+
(
727+
"dataset.metalog.run.controller",
728+
["test", "fio_1", "fio_2", "uperf_1", "uperf_2", "uperf_3", "uperf_4"],
729+
),
730+
(
731+
"global.test.sequence:desc",
732+
["fio_1", "fio_2", "test", "uperf_1", "uperf_2", "uperf_3", "uperf_4"],
733+
),
734+
(
735+
"global.test.sequence",
736+
["uperf_4", "uperf_3", "uperf_2", "uperf_1", "test", "fio_2", "fio_1"],
737+
),
738+
(
739+
"user.test.odd,global.test.sequence:desc",
740+
["fio_1", "test", "uperf_2", "uperf_4", "fio_2", "uperf_1", "uperf_3"],
741+
),
742+
(
743+
"user.test.odd:desc,dataset.name:desc",
744+
["uperf_3", "uperf_1", "fio_2", "uperf_4", "uperf_2", "test", "fio_1"],
745+
),
746+
],
747+
)
748+
def test_dataset_sort(self, server_config, query_as, sort, results):
749+
"""Test `datasets/list?sort`
750+
751+
We want a couple of consistent values sequences to play with. We can
752+
use the dataset.name and dataset.resource_id fields, but we want to
753+
cross Metadata namespaces, so add "global" and "user" keys we can
754+
order.
755+
756+
Args:
757+
server_config: The PbenchServerConfig object
758+
query_as: A fixture to provide a helper that executes the API call
759+
login: The username as which to perform a query
760+
query: A JSON representation of the query parameters (these will be
761+
automatically supplemented with a metadata request term)
762+
results: A list of the dataset names we expect to be returned
763+
"""
764+
765+
# Assign "sequence numbers" in the inverse order of name
766+
test = User.query(username="test")
767+
all = Database.db_session.query(Dataset).order_by(desc(Dataset.name)).all()
768+
for i, d in enumerate(all):
769+
odd = i & 1
770+
Metadata.setvalue(d, "global.test.sequence", i)
771+
Metadata.setvalue(d, "user.test.odd", odd, user=test)
772+
query = {"sort": sort, "metadata": ["dataset.uploaded"]}
773+
result = query_as(query, "test", HTTPStatus.OK)
774+
self.compare_results(result.json, results, query, server_config)

0 commit comments

Comments
 (0)