Skip to content

Commit 428a05a

Browse files
committed
Return "raw" API parameters in pagination
PBENCH-1133 The `GET /datasets` response is optimized for sequential pagination, providing a convenient "next_url" string that can be used directly. However if a client wants to support "random access" pagination, this requires that the client parses the URL string in order to modify the `offset` parameter. This attempts to make that a bit easier by supplementing the current response payload with a `parameters` field containing the query parameters JSON object, making it easy to update the `offset` parameter. (Making the unit tests work against the normalized parameter list proved a bit challenging and I ended up saving the original "raw" client parameters in the API `context` so they can be used directly.)
1 parent 87822ef commit 428a05a

File tree

3 files changed

+68
-27
lines changed

3 files changed

+68
-27
lines changed

lib/pbench/server/api/resources/__init__.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,11 +1700,8 @@ def _dispatch(
17001700
try:
17011701
if schema.query_schema:
17021702
query_params = self._gather_query_params(request, schema.query_schema)
1703-
1704-
params = self.schemas.validate(
1705-
method,
1706-
ApiParams(body=body_params, query=query_params, uri=uri_params),
1707-
)
1703+
raw_params = ApiParams(body=body_params, query=query_params, uri=uri_params)
1704+
params = self.schemas.validate(method, raw_params)
17081705
except APIInternalError as e:
17091706
current_app.logger.exception("{} {}", api_name, e.details)
17101707
abort(e.http_status, message=str(e))
@@ -1772,7 +1769,11 @@ def _dispatch(
17721769
"attributes": None,
17731770
}
17741771

1775-
context = {"auditing": auditing, "attributes": schema.attributes}
1772+
context = {
1773+
"auditing": auditing,
1774+
"attributes": schema.attributes,
1775+
"raw_params": raw_params,
1776+
}
17761777
try:
17771778
response = execute(params, request, context)
17781779
except APIInternalError as e:

lib/pbench/server/api/resources/datasets_list.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def __init__(self, config: PbenchServerConfig):
296296
)
297297

298298
def get_paginated_obj(
299-
self, query: Query, json: JSON, url: str
299+
self, query: Query, json: JSON, raw_params: ApiParams, url: str
300300
) -> tuple[list[JSONOBJECT], dict[str, str]]:
301301
"""Helper function to return a slice of datasets (constructed according
302302
to the user specified limit and an offset number) and a paginated object
@@ -309,10 +309,15 @@ def get_paginated_obj(
309309
"limit": 10 -> dataset[0: 10]
310310
"offset": 20 -> dataset[20: total_items_count]
311311
312-
TODO: We may need to optimize the pagination
313-
e.g Use of unique pointers to record the last returned row and then
314-
use this pointer in subsequent page request instead of an initial
315-
start to narrow down the result.
312+
Args:
313+
query: A SQLAlchemy query object
314+
json: The query parameters in normalized JSON form
315+
raw_params: The original API parameters for reference
316+
url: The API URL
317+
318+
Returns:
319+
The list of Dataset objects matched by the query and a pagination
320+
framework object.
316321
"""
317322
paginated_result = {}
318323
query = query.distinct()
@@ -333,14 +338,17 @@ def get_paginated_obj(
333338
Database.dump_query(query, current_app.logger)
334339

335340
items = query.all()
341+
raw = raw_params.query.copy()
336342
next_offset = offset + len(items)
337343
if next_offset < total_count:
338-
json["offset"] = next_offset
344+
json["offset"] = str(next_offset)
345+
raw["offset"] = str(next_offset)
339346
parsed_url = urlparse(url)
340347
next_url = parsed_url._replace(query=urlencode_json(json)).geturl()
341348
else:
342349
next_url = ""
343350

351+
paginated_result["parameters"] = raw
344352
paginated_result["next_url"] = next_url
345353
paginated_result["total"] = total_count
346354
return items, paginated_result
@@ -600,7 +608,12 @@ def daterange(self, query: Query) -> JSONOBJECT:
600608
return {}
601609

602610
def datasets(
603-
self, request: Request, aliases: dict[str, Any], json: JSONOBJECT, query: Query
611+
self,
612+
request: Request,
613+
aliases: dict[str, Any],
614+
json: JSONOBJECT,
615+
raw_params: ApiParams,
616+
query: Query,
604617
) -> JSONOBJECT:
605618
"""Gather and paginate the selected datasets
606619
@@ -611,6 +624,7 @@ def datasets(
611624
request: The HTTP Request object
612625
aliases: Map of join column aliases for each Metadata namespace
613626
json: The JSON query parameters
627+
raw_params: The original API parameters (used for pagination)
614628
query: The basic filtered SQLAlchemy query object
615629
616630
Returns:
@@ -666,7 +680,7 @@ def datasets(
666680

667681
try:
668682
datasets, paginated_result = self.get_paginated_obj(
669-
query=query, json=json, url=request.url
683+
query=query, json=json, raw_params=raw_params, url=request.url
670684
)
671685
except (AttributeError, ProgrammingError, StatementError) as e:
672686
raise APIInternalError(
@@ -812,5 +826,5 @@ def _get(
812826
result.update(self.daterange(query))
813827
done = True
814828
if not done:
815-
result = self.datasets(request, aliases, json, query)
829+
result = self.datasets(request, aliases, json, context["raw_params"], query)
816830
return jsonify(result)

lib/pbench/test/unit/server/test_datasets_list.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22
from http import HTTPStatus
33
import re
4-
from typing import Optional
4+
from typing import Any, Optional
55

66
import pytest
77
import requests
@@ -10,7 +10,7 @@
1010
from sqlalchemy.orm import aliased, Query
1111

1212
from pbench.server import JSON, JSONARRAY, JSONOBJECT
13-
from pbench.server.api.resources import APIAbort
13+
from pbench.server.api.resources import APIAbort, ApiParams
1414
from pbench.server.api.resources.datasets_list import DatasetsList, urlencode_json
1515
from pbench.server.database.database import Database
1616
from pbench.server.database.models.datasets import Dataset, Metadata
@@ -129,17 +129,26 @@ def get_results(self, name_list: list[str], query: JSON, server_config) -> JSON:
129129
Returns:
130130
Paginated JSON object containing list of dataset values
131131
"""
132+
133+
def convert(k: str, v: Any) -> Any:
134+
if isinstance(v, str) and k in ("filter", "sort", "metadata"):
135+
return [v]
136+
elif isinstance(v, int):
137+
return str(v)
138+
else:
139+
return v
140+
132141
results: list[JSON] = []
133-
offset = query.get("offset", 0)
142+
offset = int(query.get("offset", 0))
134143
limit = query.get("limit")
135144

136145
if limit:
137-
next_offset = offset + limit
146+
next_offset = offset + int(limit)
138147
paginated_name_list = name_list[offset:next_offset]
139148
if next_offset >= len(name_list):
140149
next_url = ""
141150
else:
142-
query["offset"] = next_offset
151+
query["offset"] = str(next_offset)
143152
next_url = (
144153
f"http://localhost{server_config.rest_uri}/datasets?"
145154
+ urlencode_json(query)
@@ -161,7 +170,15 @@ def get_results(self, name_list: list[str], query: JSON, server_config) -> JSON:
161170
},
162171
}
163172
)
164-
return {"next_url": next_url, "results": results, "total": len(name_list)}
173+
q1 = {k: convert(k, v) for k, v in query.items()}
174+
if "metadata" not in q1:
175+
q1["metadata"] = ["dataset.uploaded"]
176+
return {
177+
"parameters": q1,
178+
"next_url": next_url,
179+
"results": results,
180+
"total": len(name_list),
181+
}
165182

166183
def compare_results(
167184
self, result: JSONOBJECT, name_list: list[str], query: JSON, server_config
@@ -190,8 +207,8 @@ def compare_results(
190207
(None, {}, ["fio_1", "fio_2"]),
191208
(None, {"access": "public"}, ["fio_1", "fio_2"]),
192209
("drb", {"name": "fio"}, ["fio_1", "fio_2"]),
193-
("drb", {"name": "fio", "limit": 1}, ["fio_1", "fio_2"]),
194-
("drb", {"name": "fio", "limit": 1, "offset": 2}, ["fio_1", "fio_2"]),
210+
("drb", {"name": "fio", "limit": "1"}, ["fio_1", "fio_2"]),
211+
("drb", {"name": "fio", "limit": 1, "offset": "2"}, ["fio_1", "fio_2"]),
195212
("drb", {"name": "fio", "offset": 1}, ["fio_1", "fio_2"]),
196213
("drb", {"name": "fio", "offset": 2}, ["fio_1", "fio_2"]),
197214
("drb", {"owner": "drb"}, ["drb", "fio_1"]),
@@ -311,7 +328,9 @@ def test_mine_novalue(self, server_config, client, more_datasets, get_token_func
311328
headers=headers,
312329
)
313330
assert response.status_code == HTTPStatus.OK
314-
self.compare_results(response.json, ["drb", "fio_1"], {}, server_config)
331+
self.compare_results(
332+
response.json, ["drb", "fio_1"], {"mine": ""}, server_config
333+
)
315334

316335
@pytest.mark.parametrize(
317336
"login,query,results",
@@ -336,7 +355,7 @@ def test_dataset_paged_list(self, query_as, login, query, results, server_config
336355
results: A list of the dataset names we expect to be returned
337356
server_config: The PbenchServerConfig object
338357
"""
339-
query.update({"metadata": ["dataset.uploaded"], "limit": 5})
358+
query.update({"metadata": ["dataset.uploaded"], "limit": "5"})
340359
result = query_as(query, login, HTTPStatus.OK)
341360
self.compare_results(result.json, results, query, server_config)
342361

@@ -384,6 +403,7 @@ def test_get_key_errors(self, query_as):
384403
)
385404
assert response.json == {
386405
"next_url": "",
406+
"parameters": {"metadata": ["global.test.foo"]},
387407
"results": [
388408
{
389409
"metadata": {"global.test.foo": None},
@@ -444,6 +464,12 @@ def test_use_funk_metalog_keys(self, query_as):
444464
)
445465
assert response.json == {
446466
"next_url": "",
467+
"parameters": {
468+
"filter": [
469+
"dataset.metalog.iterations/[email protected]_name:~10"
470+
],
471+
"metadata": ["dataset.metalog.iterations/fooBar=10-what_else@weird"],
472+
},
447473
"results": [
448474
{
449475
"metadata": {
@@ -725,7 +751,7 @@ def test_mismatched_json_cast(self, query_as, server_config, query, results):
725751
"drb",
726752
HTTPStatus.OK,
727753
)
728-
self.compare_results(response.json, results, {}, server_config)
754+
self.compare_results(response.json, results, {"filter": query}, server_config)
729755

730756
@pytest.mark.parametrize(
731757
"query,message",
@@ -769,7 +795,7 @@ def test_pagination_error(self, caplog, monkeypatch, query_as, exception, error)
769795
"""
770796

771797
def do_error(
772-
self, query: Query, json: JSONOBJECT, url: str
798+
self, query: Query, json: JSONOBJECT, raw_params: ApiParams, url: str
773799
) -> tuple[JSONARRAY, JSONOBJECT]:
774800
raise exception
775801

0 commit comments

Comments
 (0)