Skip to content

Commit 0a5a032

Browse files
authored
Return "raw" API parameters in pagination (#3412)
* Return "raw" API parameters in pagination PBENCH-1133 The `GET /datasets` response is optimized for sequential pagination, providing a convenient "next_url" string that can be used directly. However if a client wants to support "random access" pagination, this requires that the client parses the URL string in order to modify the `offset` parameter. This attempts to make that a bit easier by supplementing the current response payload with a `parameters` field containing the query parameters JSON object, making it easy to update the `offset` parameter. (Making the unit tests work against the normalized parameter list proved a bit challenging and I ended up saving the original "raw" client parameters in the API `context` so they can be used directly.)
1 parent 245bb0d commit 0a5a032

File tree

3 files changed

+82
-27
lines changed

3 files changed

+82
-27
lines changed

lib/pbench/server/api/resources/__init__.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,11 +1700,8 @@ def _dispatch(
17001700
try:
17011701
if schema.query_schema:
17021702
query_params = self._gather_query_params(request, schema.query_schema)
1703-
1704-
params = self.schemas.validate(
1705-
method,
1706-
ApiParams(body=body_params, query=query_params, uri=uri_params),
1707-
)
1703+
raw_params = ApiParams(body=body_params, query=query_params, uri=uri_params)
1704+
params = self.schemas.validate(method, raw_params)
17081705
except APIInternalError as e:
17091706
current_app.logger.exception("{} {}", api_name, e.details)
17101707
abort(e.http_status, message=str(e))
@@ -1772,7 +1769,11 @@ def _dispatch(
17721769
"attributes": None,
17731770
}
17741771

1775-
context = {"auditing": auditing, "attributes": schema.attributes}
1772+
context = {
1773+
"auditing": auditing,
1774+
"attributes": schema.attributes,
1775+
"raw_params": raw_params,
1776+
}
17761777
try:
17771778
response = execute(params, request, context)
17781779
except APIInternalError as e:

lib/pbench/server/api/resources/datasets_list.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def __init__(self, config: PbenchServerConfig):
296296
)
297297

298298
def get_paginated_obj(
299-
self, query: Query, json: JSON, url: str
299+
self, query: Query, json: JSON, raw_params: ApiParams, url: str
300300
) -> tuple[list[JSONOBJECT], dict[str, str]]:
301301
"""Helper function to return a slice of datasets (constructed according
302302
to the user specified limit and an offset number) and a paginated object
@@ -309,10 +309,15 @@ def get_paginated_obj(
309309
"limit": 10 -> dataset[0: 10]
310310
"offset": 20 -> dataset[20: total_items_count]
311311
312-
TODO: We may need to optimize the pagination
313-
e.g Use of unique pointers to record the last returned row and then
314-
use this pointer in subsequent page request instead of an initial
315-
start to narrow down the result.
312+
Args:
313+
query: A SQLAlchemy query object
314+
json: The query parameters in normalized JSON form
315+
raw_params: The original API parameters for reference
316+
url: The API URL
317+
318+
Returns:
319+
The list of Dataset objects matched by the query and a pagination
320+
framework object.
316321
"""
317322
paginated_result = {}
318323
query = query.distinct()
@@ -333,14 +338,19 @@ def get_paginated_obj(
333338
Database.dump_query(query, current_app.logger)
334339

335340
items = query.all()
341+
raw = raw_params.query.copy()
336342
next_offset = offset + len(items)
337343
if next_offset < total_count:
338-
json["offset"] = next_offset
344+
json["offset"] = str(next_offset)
345+
raw["offset"] = str(next_offset)
339346
parsed_url = urlparse(url)
340347
next_url = parsed_url._replace(query=urlencode_json(json)).geturl()
341348
else:
349+
if limit:
350+
raw["offset"] = str(total_count)
342351
next_url = ""
343352

353+
paginated_result["parameters"] = raw
344354
paginated_result["next_url"] = next_url
345355
paginated_result["total"] = total_count
346356
return items, paginated_result
@@ -600,7 +610,12 @@ def daterange(self, query: Query) -> JSONOBJECT:
600610
return {}
601611

602612
def datasets(
603-
self, request: Request, aliases: dict[str, Any], json: JSONOBJECT, query: Query
613+
self,
614+
request: Request,
615+
aliases: dict[str, Any],
616+
json: JSONOBJECT,
617+
raw_params: ApiParams,
618+
query: Query,
604619
) -> JSONOBJECT:
605620
"""Gather and paginate the selected datasets
606621
@@ -611,6 +626,7 @@ def datasets(
611626
request: The HTTP Request object
612627
aliases: Map of join column aliases for each Metadata namespace
613628
json: The JSON query parameters
629+
raw_params: The original API parameters (used for pagination)
614630
query: The basic filtered SQLAlchemy query object
615631
616632
Returns:
@@ -666,7 +682,7 @@ def datasets(
666682

667683
try:
668684
datasets, paginated_result = self.get_paginated_obj(
669-
query=query, json=json, url=request.url
685+
query=query, json=json, raw_params=raw_params, url=request.url
670686
)
671687
except (AttributeError, ProgrammingError, StatementError) as e:
672688
raise APIInternalError(
@@ -812,5 +828,5 @@ def _get(
812828
result.update(self.daterange(query))
813829
done = True
814830
if not done:
815-
result = self.datasets(request, aliases, json, query)
831+
result = self.datasets(request, aliases, json, context["raw_params"], query)
816832
return jsonify(result)

lib/pbench/test/unit/server/test_datasets_list.py

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22
from http import HTTPStatus
33
import re
4-
from typing import Optional
4+
from typing import Any, Optional
55

66
import pytest
77
import requests
@@ -10,7 +10,7 @@
1010
from sqlalchemy.orm import aliased, Query
1111

1212
from pbench.server import JSON, JSONARRAY, JSONOBJECT
13-
from pbench.server.api.resources import APIAbort
13+
from pbench.server.api.resources import APIAbort, ApiParams
1414
from pbench.server.api.resources.datasets_list import DatasetsList, urlencode_json
1515
from pbench.server.database.database import Database
1616
from pbench.server.database.models.datasets import Dataset, Metadata
@@ -129,17 +129,27 @@ def get_results(self, name_list: list[str], query: JSON, server_config) -> JSON:
129129
Returns:
130130
Paginated JSON object containing list of dataset values
131131
"""
132+
133+
def convert(k: str, v: Any) -> Any:
134+
if isinstance(v, str) and k in ("filter", "sort", "metadata"):
135+
return [v]
136+
elif isinstance(v, int):
137+
return str(v)
138+
else:
139+
return v
140+
132141
results: list[JSON] = []
133-
offset = query.get("offset", 0)
142+
offset = int(query.get("offset", "0"))
134143
limit = query.get("limit")
135144

136145
if limit:
137-
next_offset = offset + limit
146+
next_offset = offset + int(limit)
138147
paginated_name_list = name_list[offset:next_offset]
139148
if next_offset >= len(name_list):
149+
query["offset"] = str(len(name_list))
140150
next_url = ""
141151
else:
142-
query["offset"] = next_offset
152+
query["offset"] = str(next_offset)
143153
next_url = (
144154
f"http://localhost{server_config.rest_uri}/datasets?"
145155
+ urlencode_json(query)
@@ -161,7 +171,15 @@ def get_results(self, name_list: list[str], query: JSON, server_config) -> JSON:
161171
},
162172
}
163173
)
164-
return {"next_url": next_url, "results": results, "total": len(name_list)}
174+
q1 = {k: convert(k, v) for k, v in query.items()}
175+
if "metadata" not in q1:
176+
q1["metadata"] = ["dataset.uploaded"]
177+
return {
178+
"parameters": q1,
179+
"next_url": next_url,
180+
"results": results,
181+
"total": len(name_list),
182+
}
165183

166184
def compare_results(
167185
self, result: JSONOBJECT, name_list: list[str], query: JSON, server_config
@@ -190,8 +208,8 @@ def compare_results(
190208
(None, {}, ["fio_1", "fio_2"]),
191209
(None, {"access": "public"}, ["fio_1", "fio_2"]),
192210
("drb", {"name": "fio"}, ["fio_1", "fio_2"]),
193-
("drb", {"name": "fio", "limit": 1}, ["fio_1", "fio_2"]),
194-
("drb", {"name": "fio", "limit": 1, "offset": 2}, ["fio_1", "fio_2"]),
211+
("drb", {"name": "fio", "limit": "1"}, ["fio_1", "fio_2"]),
212+
("drb", {"name": "fio", "limit": 1, "offset": "2"}, ["fio_1", "fio_2"]),
195213
("drb", {"name": "fio", "offset": 1}, ["fio_1", "fio_2"]),
196214
("drb", {"name": "fio", "offset": 2}, ["fio_1", "fio_2"]),
197215
("drb", {"owner": "drb"}, ["drb", "fio_1"]),
@@ -256,6 +274,17 @@ def compare_results(
256274
def test_dataset_list(self, server_config, query_as, login, query, results):
257275
"""Test `datasets/list` filters
258276
277+
NOTE: Several of these queries use the "limit" and/or "offset" options
278+
to test how the result set is segmented during pagination. These are
279+
represented in the parametrization above interchangeably as integers or
280+
strings. This is because (1) the actual input to the Pbench Server API
281+
is always in string form as a URI query parameter but (2) the requests
282+
package understands this and stringifies integer parameters while (3)
283+
the Pbench Server API framework recognizes these are integer values and
284+
presents them to the API code as integers. Mixing integer and string
285+
representation here must have no impact on the operation of the API so
286+
it's worth testing.
287+
259288
Args:
260289
server_config: The PbenchServerConfig object
261290
query_as: A fixture to provide a helper that executes the API call
@@ -311,7 +340,9 @@ def test_mine_novalue(self, server_config, client, more_datasets, get_token_func
311340
headers=headers,
312341
)
313342
assert response.status_code == HTTPStatus.OK
314-
self.compare_results(response.json, ["drb", "fio_1"], {}, server_config)
343+
self.compare_results(
344+
response.json, ["drb", "fio_1"], {"mine": ""}, server_config
345+
)
315346

316347
@pytest.mark.parametrize(
317348
"login,query,results",
@@ -336,7 +367,7 @@ def test_dataset_paged_list(self, query_as, login, query, results, server_config
336367
results: A list of the dataset names we expect to be returned
337368
server_config: The PbenchServerConfig object
338369
"""
339-
query.update({"metadata": ["dataset.uploaded"], "limit": 5})
370+
query.update({"metadata": ["dataset.uploaded"], "limit": "5"})
340371
result = query_as(query, login, HTTPStatus.OK)
341372
self.compare_results(result.json, results, query, server_config)
342373

@@ -384,6 +415,7 @@ def test_get_key_errors(self, query_as):
384415
)
385416
assert response.json == {
386417
"next_url": "",
418+
"parameters": {"metadata": ["global.test.foo"]},
387419
"results": [
388420
{
389421
"metadata": {"global.test.foo": None},
@@ -444,6 +476,12 @@ def test_use_funk_metalog_keys(self, query_as):
444476
)
445477
assert response.json == {
446478
"next_url": "",
479+
"parameters": {
480+
"filter": [
481+
"dataset.metalog.iterations/[email protected]_name:~10"
482+
],
483+
"metadata": ["dataset.metalog.iterations/fooBar=10-what_else@weird"],
484+
},
447485
"results": [
448486
{
449487
"metadata": {
@@ -725,7 +763,7 @@ def test_mismatched_json_cast(self, query_as, server_config, query, results):
725763
"drb",
726764
HTTPStatus.OK,
727765
)
728-
self.compare_results(response.json, results, {}, server_config)
766+
self.compare_results(response.json, results, {"filter": query}, server_config)
729767

730768
@pytest.mark.parametrize(
731769
"query,message",
@@ -769,7 +807,7 @@ def test_pagination_error(self, caplog, monkeypatch, query_as, exception, error)
769807
"""
770808

771809
def do_error(
772-
self, query: Query, json: JSONOBJECT, url: str
810+
self, query: Query, json: JSONOBJECT, raw_params: ApiParams, url: str
773811
) -> tuple[JSONARRAY, JSONOBJECT]:
774812
raise exception
775813

0 commit comments

Comments
 (0)