Skip to content

Commit 1eebe0f

Browse files
siddardh-rasiddardh
andauthored
Compare datasets - Integrate Quisby into Pbench Server API (#3470)
PBENCH-1189 --------- Co-authored-by: siddardh <sira@redhat27!>
1 parent 999f797 commit 1eebe0f

File tree

5 files changed

+300
-0
lines changed

5 files changed

+300
-0
lines changed

lib/pbench/client/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class API(Enum):
3939
"""
4040

4141
DATASETS = "datasets"
42+
DATASETS_COMPARE = "datasets_compare"
4243
DATASETS_CONTENTS = "datasets_contents"
4344
DATASETS_DETAIL = "datasets_detail"
4445
DATASETS_INVENTORY = "datasets_inventory"

lib/pbench/server/api/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pbench.common.logger import get_pbench_logger
1515
from pbench.server import PbenchServerConfig
1616
from pbench.server.api.resources.api_key import APIKeyManage
17+
from pbench.server.api.resources.datasets_compare import DatasetsCompare
1718
from pbench.server.api.resources.datasets_inventory import DatasetsInventory
1819
from pbench.server.api.resources.datasets_list import DatasetsList
1920
from pbench.server.api.resources.datasets_metadata import DatasetsMetadata
@@ -63,6 +64,12 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig):
6364
endpoint="datasets",
6465
resource_class_args=(config,),
6566
)
67+
api.add_resource(
68+
DatasetsCompare,
69+
f"{base_uri}/compare",
70+
endpoint="datasets_compare",
71+
resource_class_args=(config,),
72+
)
6673
api.add_resource(
6774
DatasetsContents,
6875
f"{base_uri}/datasets/<string:dataset>/contents/",
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from http import HTTPStatus
2+
from urllib.request import Request
3+
4+
from flask import current_app, jsonify
5+
from flask.wrappers import Response
6+
from pquisby.lib.post_processing import BenchmarkName, InputType, QuisbyProcessing
7+
8+
from pbench.server import OperationCode, PbenchServerConfig
9+
from pbench.server.api.resources import (
10+
APIAbort,
11+
ApiAuthorization,
12+
ApiAuthorizationType,
13+
ApiBase,
14+
ApiContext,
15+
APIInternalError,
16+
ApiMethod,
17+
ApiParams,
18+
ApiSchema,
19+
Parameter,
20+
ParamType,
21+
Schema,
22+
)
23+
from pbench.server.cache_manager import (
24+
CacheManager,
25+
TarballNotFound,
26+
TarballUnpackError,
27+
)
28+
from pbench.server.database.models.datasets import Metadata
29+
30+
31+
class DatasetsCompare(ApiBase):
32+
"""
33+
This class implements the Server API used to retrieve comparison data for visualization.
34+
"""
35+
36+
def __init__(self, config: PbenchServerConfig):
37+
super().__init__(
38+
config,
39+
ApiSchema(
40+
ApiMethod.GET,
41+
OperationCode.READ,
42+
query_schema=Schema(
43+
Parameter(
44+
"datasets",
45+
ParamType.LIST,
46+
element_type=ParamType.DATASET,
47+
string_list=",",
48+
required=True,
49+
),
50+
),
51+
authorization=ApiAuthorizationType.NONE,
52+
),
53+
)
54+
55+
def _get(
56+
self, params: ApiParams, request: Request, context: ApiContext
57+
) -> Response:
58+
"""
59+
This function is using Quisby to compare results into a form that supports visualization
60+
61+
Args:
62+
params: includes the uri parameters, which provide the list of dataset.
63+
request: Original incoming Request object
64+
context: API context dictionary
65+
66+
Raises:
67+
UnauthorizedAccess : The user isn't authorized for the requested access.
68+
APIAbort, reporting "NOT_FOUND" and "INTERNAL_SERVER_ERROR"
69+
APIInternalError, reporting the failure message
70+
71+
GET /api/v1/compare?datasets=d1,d2,d3
72+
"""
73+
74+
datasets = params.query.get("datasets")
75+
benchmark_choice = None
76+
for dataset in datasets:
77+
benchmark = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
78+
# Validate if all the selected datasets is of same benchmark
79+
if not benchmark_choice:
80+
benchmark_choice = benchmark
81+
elif benchmark != benchmark_choice:
82+
raise APIAbort(
83+
HTTPStatus.BAD_REQUEST,
84+
f"Selected dataset benchmarks must match: {benchmark_choice} and {benchmark} cannot be compared.",
85+
)
86+
87+
# Validate if the user is authorized to access the selected datasets
88+
self._check_authorization(
89+
ApiAuthorization(
90+
ApiAuthorizationType.USER_ACCESS,
91+
OperationCode.READ,
92+
dataset.owner_id,
93+
dataset.access,
94+
)
95+
)
96+
cache_m = CacheManager(self.config, current_app.logger)
97+
stream_file = {}
98+
for dataset in datasets:
99+
try:
100+
tarball = cache_m.find_dataset(dataset.resource_id)
101+
except TarballNotFound as e:
102+
raise APIInternalError(
103+
f"Expected dataset with ID '{dataset.resource_id}' is missing from the cache manager."
104+
) from e
105+
try:
106+
file = tarball.extract(
107+
tarball.tarball_path, f"{tarball.name}/result.csv"
108+
)
109+
except TarballUnpackError as e:
110+
raise APIInternalError(str(e)) from e
111+
stream_file[dataset.name] = file
112+
113+
benchmark_type = BenchmarkName.__members__.get(benchmark.upper())
114+
if not benchmark_type:
115+
raise APIAbort(
116+
HTTPStatus.UNSUPPORTED_MEDIA_TYPE, f"Unsupported Benchmark: {benchmark}"
117+
)
118+
get_quisby_data = QuisbyProcessing().compare_csv_to_json(
119+
benchmark_type, InputType.STREAM, stream_file
120+
)
121+
if get_quisby_data["status"] != "success":
122+
raise APIInternalError(
123+
f"Quisby processing failure. Exception: {get_quisby_data['exception']}"
124+
)
125+
return jsonify(get_quisby_data)
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
from http import HTTPStatus
2+
from pathlib import Path
3+
from typing import Optional
4+
5+
from pquisby.lib.post_processing import QuisbyProcessing
6+
import pytest
7+
import requests
8+
9+
from pbench.server import JSON
10+
from pbench.server.cache_manager import CacheManager, TarballUnpackError
11+
from pbench.server.database.models.datasets import Dataset, DatasetNotFound, Metadata
12+
from pbench.server.database.models.users import User
13+
14+
15+
def mock_get_value(dataset: Dataset, key: str, user: Optional[User] = None) -> str:
16+
if dataset.name == "uperf_3" or dataset.name == "uperf_4":
17+
return "hammerDB"
18+
return "uperf"
19+
20+
21+
class TestCompareDatasets:
22+
@pytest.fixture()
23+
def query_get_as(self, client, server_config, more_datasets, get_token_func):
24+
"""
25+
Helper fixture to perform the API query and validate an expected
26+
return status.
27+
28+
Args:
29+
client: Flask test API client fixture
30+
server_config: Pbench config fixture
31+
more_datasets: Dataset construction fixture
32+
get_token_func: Pbench token fixture
33+
"""
34+
35+
def query_api(
36+
datasets: list, user: str, expected_status: HTTPStatus
37+
) -> requests.Response:
38+
ds_list = []
39+
for dataset in datasets:
40+
try:
41+
dataset_id = Dataset.query(name=dataset).resource_id
42+
ds_list.append(dataset_id)
43+
except DatasetNotFound:
44+
ds_list.append(dataset) # Allow passing deliberately bad value
45+
headers = None
46+
if user:
47+
headers = {"authorization": f"bearer {get_token_func(user)}"}
48+
response = client.get(
49+
f"{server_config.rest_uri}/compare",
50+
query_string={"datasets": ds_list},
51+
headers=headers,
52+
)
53+
assert response.status_code == expected_status
54+
return response
55+
56+
return query_api
57+
58+
class MockTarball:
59+
tarball_path = Path("/dataset/tarball.tar.xz")
60+
name = "tarball"
61+
62+
@staticmethod
63+
def extract(_tarball_path: Path, _path: str) -> str:
64+
return "CSV_file_as_a_string"
65+
66+
def mock_find_dataset(self, dataset) -> MockTarball:
67+
# Validate the resource_id
68+
Dataset.query(resource_id=dataset)
69+
return self.MockTarball()
70+
71+
def test_dataset_not_present(self, query_get_as, monkeypatch):
72+
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
73+
74+
query_get_as(["fio_2"], "drb", HTTPStatus.INTERNAL_SERVER_ERROR)
75+
76+
def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch):
77+
@staticmethod
78+
def mock_extract(_tarball_path: Path, _path: str) -> str:
79+
return "IncorrectData"
80+
81+
def mock_compare_csv_to_json(
82+
self, benchmark_name, input_type, data_stream
83+
) -> JSON:
84+
return {"status": "failed", "exception": "Unsupported Media Type"}
85+
86+
monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
87+
monkeypatch.setattr(self.MockTarball, "extract", mock_extract)
88+
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
89+
monkeypatch.setattr(
90+
QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
91+
)
92+
query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)
93+
94+
def test_tarball_unpack_exception(self, query_get_as, monkeypatch):
95+
@staticmethod
96+
def mock_extract(_tarball_path: Path, _path: str):
97+
raise TarballUnpackError(
98+
_tarball_path, f"Testing unpack exception for path {_path}"
99+
)
100+
101+
monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
102+
monkeypatch.setattr(self.MockTarball, "extract", mock_extract)
103+
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
104+
query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)
105+
106+
@pytest.mark.parametrize(
107+
"user,datasets,exp_status,exp_message",
108+
(
109+
(
110+
"drb",
111+
["uperf_1", "nonexistent-dataset"],
112+
HTTPStatus.BAD_REQUEST,
113+
"Unrecognized list value ['nonexistent-dataset'] given for parameter datasets; expected Dataset",
114+
),
115+
(
116+
"drb",
117+
["uperf_1", "uperf_2"],
118+
HTTPStatus.FORBIDDEN,
119+
"User drb is not authorized to READ a resource owned by test with private access",
120+
),
121+
(
122+
"test",
123+
["uperf_1", "uperf_2"],
124+
HTTPStatus.OK,
125+
None,
126+
),
127+
(
128+
None,
129+
["fio_1", "fio_2"],
130+
HTTPStatus.OK,
131+
None,
132+
),
133+
(
134+
"test",
135+
["fio_1", "uperf_3"],
136+
HTTPStatus.BAD_REQUEST,
137+
"Selected dataset benchmarks must match: uperf and hammerDB cannot be compared.",
138+
),
139+
(
140+
"test",
141+
["uperf_3", "uperf_4"],
142+
HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
143+
"Unsupported Benchmark: hammerDB",
144+
),
145+
),
146+
)
147+
def test_datasets_with_different_benchmark(
148+
self, user, datasets, exp_status, exp_message, query_get_as, monkeypatch
149+
):
150+
def mock_compare_csv_to_json(
151+
self, benchmark_name, input_type, data_stream
152+
) -> JSON:
153+
return {"status": "success", "json_data": "quisby_data"}
154+
155+
monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
156+
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
157+
monkeypatch.setattr(
158+
QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
159+
)
160+
161+
response = query_get_as(datasets, user, exp_status)
162+
if exp_status == HTTPStatus.OK:
163+
assert response.json["status"] == "success"
164+
assert response.json["json_data"] == "quisby_data"
165+
else:
166+
assert response.json["message"] == exp_message

lib/pbench/test/unit/server/test_endpoint_configure.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def check_config(self, client, server_config, host, my_headers={}):
5959
"template": f"{uri}/datasets/{{dataset}}",
6060
"params": {"dataset": {"type": "string"}},
6161
},
62+
"datasets_compare": {"template": f"{uri}/compare", "params": {}},
6263
"datasets_contents": {
6364
"template": f"{uri}/datasets/{{dataset}}/contents/{{target}}",
6465
"params": {

0 commit comments

Comments
 (0)