Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion jenkins/run-server-func-tests
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ else
fi

if [[ ${exit_status} -ne 0 ]]; then
dump_journal
if (( ${cleanup_flag} )); then
dump_journal
fi
printf -- "\nFunctional tests exited with code %s\n" ${exit_status} >&2
fi

Expand Down
12 changes: 9 additions & 3 deletions lib/pbench/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,14 +494,19 @@ def set_metadata(self, dataset_id: str, metadata: JSONOBJECT) -> JSONOBJECT:
).json()

def update(
self, dataset_id: str, access: Optional[str] = None, owner: Optional[str] = None
) -> JSONOBJECT:
self,
dataset_id: str,
access: Optional[str] = None,
owner: Optional[str] = None,
**kwargs,
) -> requests.Response:
"""Update the dataset access or owner

Args:
dataset_id: the resource ID of the targeted dataset
access: set the access mode of the dataset (private, public)
owner: set the owning username of the dataset (requires ADMIN)
kwargs: additional client options

Returns:
A JSON document containing the response
Expand All @@ -515,7 +520,8 @@ def update(
api=API.DATASETS,
uri_params={"dataset": dataset_id},
params=params,
).json()
**kwargs,
)

def get_settings(self, key: str = "") -> JSONOBJECT:
"""Return requested server setting.
Expand Down
4 changes: 2 additions & 2 deletions lib/pbench/server/api/resources/datasets_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _get(
datasets = params.query.get("datasets")
benchmark_choice = None
for dataset in datasets:
benchmark = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
benchmark = Metadata.getvalue(dataset, Metadata.SERVER_BENCHMARK)
# Validate if all the selected datasets is of same benchmark
if not benchmark_choice:
benchmark_choice = benchmark
Expand Down Expand Up @@ -116,5 +116,5 @@ def _get(
raise APIInternalError(
f"Quisby processing failure. Exception: {quisby_response['exception']}"
)
quisby_response["benchmark"] = benchmark.lower()
quisby_response["benchmark"] = benchmark
return jsonify(quisby_response)
7 changes: 3 additions & 4 deletions lib/pbench/server/api/resources/datasets_visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@ def _get(

dataset = params.uri["dataset"]

metadata = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
benchmark = metadata.upper()
benchmark_type = BenchmarkName.__members__.get(benchmark)
benchmark = Metadata.getvalue(dataset, Metadata.SERVER_BENCHMARK)
benchmark_type = BenchmarkName.__members__.get(benchmark.upper())
if not benchmark_type:
raise APIAbort(
HTTPStatus.BAD_REQUEST, f"Unsupported Benchmark: {benchmark}"
Expand All @@ -84,5 +83,5 @@ def _get(
raise APIInternalError(
f"Quisby processing failure. Exception: {quisby_response['exception']}"
)
quisby_response["benchmark"] = benchmark.lower()
quisby_response["benchmark"] = benchmark
return jsonify(quisby_response)
4 changes: 4 additions & 0 deletions lib/pbench/server/api/resources/endpoint_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from urllib.parse import urljoin

from flask import current_app, jsonify, Request, Response
from pquisby.lib.post_processing import BenchmarkName

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
Expand Down Expand Up @@ -122,6 +123,9 @@ def _get(self, args: ApiParams, request: Request, context: ApiContext) -> Respon
endpoints = {
"identification": f"Pbench server {self.server_config.COMMIT_ID}",
"uri": templates,
"visualization": {
"benchmarks": [m.lower() for m in BenchmarkName.__members__.keys()]
},
}

client = self.server_config.get("openid", "client")
Expand Down
39 changes: 25 additions & 14 deletions lib/pbench/server/api/resources/intake_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,15 +419,22 @@ def _intake(
# Add the processed tarball metadata.log file contents, if any.
#
# If we were unable to find or parse the tarball's metadata.log
# file, construct a minimal metadata context identifying the
# dataset as a "foreign" benchmark script, and disable indexing,
# which requires the metadata.
# file we construct a minimal metadata context identifying the
# dataset as an unknown benchmark script, and we disable indexing,
# which requires result metadata.
try:
metalog = tarball.metadata
if not metalog:
metalog = {"pbench": {"script": "Foreign"}}
benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN
metalog = {"pbench": {"name": dataset.name, "script": benchmark}}
metadata[Metadata.SERVER_ARCHIVE] = True
attributes["missing_metadata"] = True
else:
p = metalog.get("pbench")
if p:
benchmark = p.get("script", Metadata.SERVER_BENCHMARK_UNKNOWN)
else:
benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN
Comment on lines 427 to +437
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively,

                benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN
                if not metalog:
                    metalog = {"pbench": {"name": dataset.name, "script": benchmark}}
                    metadata[Metadata.SERVER_ARCHIVE] = True
                    attributes["missing_metadata"] = True
                else:
                    p = metalog.get("pbench", {"script": benchmark})
                    benchmark = p.get("script", benchmark)

Metadata.create(dataset=dataset, key=Metadata.METALOG, value=metalog)
except Exception as exc:
raise APIInternalError(
Expand All @@ -447,17 +454,21 @@ def _intake(
try:
retention = datetime.timedelta(days=retention_days)
deletion = dataset.uploaded + retention
Metadata.setvalue(
dataset=dataset,
key=Metadata.TARBALL_PATH,
value=str(tarball.tarball_path),
)
Metadata.setvalue(
dataset=dataset,
key=Metadata.SERVER_DELETION,
value=UtcTimeHelper(deletion).to_iso_string(),

# Make a shallow copy so we can add keys without affecting the
# original (which will be recorded in the audit log)
meta = metadata.copy()
meta.update(
{
Metadata.TARBALL_PATH: str(tarball.tarball_path),
Metadata.SERVER_DELETION: UtcTimeHelper(
deletion
).to_iso_string(),
Metadata.SERVER_BENCHMARK: benchmark,
}
)
f = self._set_dataset_metadata(dataset, metadata)
current_app.logger.info("Metadata for {}: {}", dataset.name, meta)
f = self._set_dataset_metadata(dataset, meta)
if f:
attributes["failures"] = f
except Exception as e:
Expand Down
10 changes: 7 additions & 3 deletions lib/pbench/server/api/resources/query_apis/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from http import HTTPStatus
from typing import AnyStr, List, NoReturn, Union

from flask import current_app

from pbench.server import JSON, PbenchServerConfig
from pbench.server.api.resources import (
APIAbort,
ApiAuthorizationType,
ApiContext,
APIInternalError,
Expand Down Expand Up @@ -119,11 +121,13 @@ def get_index(self, dataset: Dataset, root_index_name: AnyStr) -> AnyStr:
try:
index_map = Metadata.getvalue(dataset=dataset, key=Metadata.INDEX_MAP)
except MetadataError as exc:
current_app.logger.error("{}", exc)
raise APIInternalError(f"Required metadata {Metadata.INDEX_MAP} missing")
if Metadata.getvalue(dataset, Metadata.SERVER_ARCHIVE):
raise APIAbort(HTTPStatus.CONFLICT, "Dataset is not indexed")
Comment on lines +124 to +125
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This message is insufficient: in 90% of the cases when users receive this, they are going to ask, "why isn't this dataset indexed?" And, it's unlikely that they are going to realize that it is because indexing was disabled on it when it was uploaded.

So, this message really should say something more like, "Indexing of this dataset was disabled by the owner".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not necessarily by deliberate action of the owner: if a tarball is uploaded without a metadata.log in the right place, we'll accept it but mark it "archive only". I suppose I could have said that it's marked for archive only, but at the time that seemed more complicated and more obscure than just saying it wasn't indexed. Maybe "indexing was disabled" would be OK, but "by the owner" implies deliberate intent.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if a tarball is uploaded without a metadata.log in the right place, we'll accept it but mark it "archive only".

Ah, yes, I'd neglected that detail. But, yes, "marked for archive only" would be much better than "not indexed".

raise APIInternalError(
f"Required metadata {Metadata.INDEX_MAP} missing"
) from exc

if index_map is None:
current_app.logger.error("Index map metadata has no value")
raise APIInternalError(
f"Required metadata {Metadata.INDEX_MAP} has no value"
)
Expand Down
7 changes: 7 additions & 0 deletions lib/pbench/server/database/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,13 @@ class Metadata(Database.Base):
METALOG = "metalog"
NATIVE_KEYS = [GLOBAL, METALOG, SERVER, USER]

# BENCHMARK provides a simple identification of the native benchmark where
# one can be identified. At the simplest for Pbench Agent tarballs this is
# just "dataset.metalog.pbench.script" but will be expanded in the future
# for example to identify a "pbench-user-benchmark -- fio" as "fio".
SERVER_BENCHMARK = "server.benchmark"
SERVER_BENCHMARK_UNKNOWN = "unknown"
Comment on lines +615 to +620
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably would be good to have a reference here to possible values for this key (i.e., to BenchmarkName or endpoints["visualization"]["benchmarks"]?).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those would constitute examples, but I already added fio as an example. Maybe eventually we'll support visualization of all possible benchmarks, but right now either of those references would be incomplete.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, the value associated with "server.benchmark" can be anything the user wants it to be...but, if it's not one of the BenchmarkName values, the Server won't be able to recognize it, and if it's not one of endpoints["visualization"]["benchmarks"] it can't be visualized by the Dashboard, right? I vote that we say that, here....


# DELETION timestamp for dataset based on user settings and system
# settings when the dataset is created.
#
Expand Down
1 change: 1 addition & 0 deletions lib/pbench/test/functional/server/test_connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def test_connect(self, server_client: PbenchServerClient):
assert endpoints
assert "identification" in endpoints
assert "uri" in endpoints
assert endpoints["visualization"]["benchmarks"] == ["uperf"]

# Verify that all expected endpoints are reported
for a in endpoints["uri"].keys():
Expand Down
85 changes: 66 additions & 19 deletions lib/pbench/test/functional/server/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,13 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user):
print(f"\t... uploaded {name}: {a}")

datasets = server_client.get_list(
metadata=["dataset.access", "server.tarball-path", "dataset.operations"]
metadata=[
"dataset.access",
"dataset.metalog.pbench.script",
"server.benchmark",
"server.tarball-path",
"dataset.operations",
]
)
found = frozenset({d.name for d in datasets})
expected = frozenset(tarballs.keys())
Expand All @@ -91,6 +97,10 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user):
t = tarballs[dataset.name]
assert dataset.name in dataset.metadata["server.tarball-path"]
assert dataset.metadata["dataset.operations"]["UPLOAD"]["state"] == "OK"
assert (
dataset.metadata["dataset.metalog.pbench.script"]
== dataset.metadata["server.benchmark"]
)
Comment on lines +100 to +103
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this assertion (generally) holds at the moment, this isn't really what we want going forward, right? That is, we're hoping that at some point we'll have pbench-user-benchmark as the script and fio as the benchmark, right? So, it would be better if we could structure this assertion in a way which doesn't break when we get where we want to be going. (I.e., "test as specification".)

(Also, you didn't take this shortcut at lines 190 and 192.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the archiveonly test, I know exactly what values to expect; this is an iteration, and all we can really know at this point is that they ought to match.

When and if we work out a more general "base benchmark" recognition that doesn't rely on dataset.metalog.pbench.script, we'll have to figure out how to mimic that on the functional test side.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all we can really know at this point is that they ought to match.

They should only match currently because of an implementation detail, not because that's specifically the way we want these fields to behave. It would be best if the tests were constructed in a way that doesn't require them to change when details of the implementation change.... (Ideally, we would not change the tests nearly as often as we seem to do.)

assert t.access == dataset.metadata["dataset.access"]
except HTTPError as exc:
pytest.fail(
Expand Down Expand Up @@ -169,9 +179,17 @@ def test_archive_only(server_client: PbenchServerClient, login_user):
API.DATASETS_INVENTORY, {"dataset": md5, "target": ""}
)
metadata = server_client.get_metadata(
md5, ["dataset.operations", "server.archiveonly"]
md5,
[
"dataset.metalog.pbench.script",
"dataset.operations",
"server.archiveonly",
"server.benchmark",
],
)
assert metadata["dataset.metalog.pbench.script"] == "fio"
assert metadata["server.archiveonly"] is True
assert metadata["server.benchmark"] == "fio"

# NOTE: we could wait here; however, the UNPACK operation is only
# enabled by upload, and INDEX is only enabled by UNPACK: so if they're
Expand Down Expand Up @@ -209,9 +227,18 @@ def test_no_metadata(server_client: PbenchServerClient, login_user):
API.DATASETS_INVENTORY, {"dataset": md5, "target": ""}
)
metadata = server_client.get_metadata(
md5, ["dataset.operations", "dataset.metalog", "server.archiveonly"]
md5,
[
"dataset.operations",
"dataset.metalog",
"server.archiveonly",
"server.benchmark",
],
)
assert metadata["dataset.metalog"] == {"pbench": {"script": "Foreign"}}
assert metadata["dataset.metalog"] == {
"pbench": {"name": name, "script": "unknown"}
}
assert metadata["server.benchmark"] == "unknown"
assert metadata["server.archiveonly"] is True

# NOTE: we could wait here; however, the UNPACK operation is only
Expand Down Expand Up @@ -361,17 +388,21 @@ def test_details(self, server_client: PbenchServerClient, login_user):
)
for d in datasets:
print(f"\t... checking run index for {d.name}")
indexed = not d.metadata["server.archiveonly"]
response = server_client.get(
API.DATASETS_DETAIL, {"dataset": d.resource_id}
API.DATASETS_DETAIL, {"dataset": d.resource_id}, raise_error=False
)
detail = response.json()
indexed = not d.metadata["server.archiveonly"]
if indexed:
assert (
response.ok
), f"DETAILS for {d.name} failed with {detail['message']}"
assert (
d.metadata["dataset.metalog.pbench"]["script"]
== detail["runMetadata"]["script"]
)
else:
assert response.status_code == HTTPStatus.CONFLICT
print(f"\t\t... {d.name} is archiveonly")


Expand Down Expand Up @@ -554,8 +585,15 @@ def test_contents(self, server_client: PbenchServerClient, login_user):

for dataset in datasets:
response = server_client.get(
API.DATASETS_CONTENTS, {"dataset": dataset.resource_id, "target": ""}
API.DATASETS_CONTENTS,
{"dataset": dataset.resource_id, "target": ""},
raise_error=False,
)
archive = dataset.metadata["server.archiveonly"]
if archive:
assert response.status_code == HTTPStatus.CONFLICT
return

assert (
response.ok
), f"CONTENTS {dataset.name} failed {response.status_code}:{response.json()['message']}"
Expand All @@ -565,15 +603,14 @@ def test_contents(self, server_client: PbenchServerClient, login_user):
# directory is technically possible, but not legal unless it's a
# trivial "archiveonly" dataset. NOTE: this will also fail if
# either the "directories" or "files" JSON keys are missing.
archive = dataset.metadata["server.archiveonly"]
assert json["directories"] or json["files"] or archive
assert json["directories"] or json["files"]

# Even if they're empty, both values must be lists
assert isinstance(json["directories"], list)
assert isinstance(json["files"], list)

# Unless archiveonly, we need at least a metadata.log
assert archive or "metadata.log" in (f["name"] for f in json["files"])
# We need at least a metadata.log
assert "metadata.log" in (f["name"] for f in json["files"])

for d in json["directories"]:
uri = server_client._uri(
Expand All @@ -598,7 +635,7 @@ def test_visualize(self, server_client: PbenchServerClient, login_user):
"""
datasets = server_client.get_list(
owner="tester",
filter=["dataset.metalog.pbench.script:uperf"],
filter=["server.benchmark:uperf"],
)

for dataset in datasets:
Expand All @@ -624,7 +661,7 @@ def test_compare(self, server_client: PbenchServerClient, login_user):
"""
datasets = server_client.get_list(
owner="tester",
filter=["dataset.metalog.pbench.script:uperf"],
filter=["server.benchmark:uperf"],
)

candidates = [dataset.resource_id for dataset in datasets]
Expand Down Expand Up @@ -686,15 +723,25 @@ class TestUpdate:
@pytest.mark.parametrize("access", ("public", "private"))
def test_publish(self, server_client: PbenchServerClient, login_user, access):
expected = "public" if access == "private" else "private"
datasets = server_client.get_list(access=access, mine="true")
datasets = server_client.get_list(
access=access, mine="true", metadata=["server.archiveonly"]
)
print(f" ... updating {access} datasets to {expected} ...")
for dataset in datasets:
server_client.update(dataset.resource_id, access=expected)
print(f"\t ... updating {dataset.name} to {access!r}")
meta = server_client.get_metadata(
dataset.resource_id, metadata="dataset.access"
response = server_client.update(
dataset.resource_id, access=expected, raise_error=False
)
assert meta["dataset.access"] == expected
print(f"\t ... updating {dataset.name} to {access!r}")
if response.ok:
Comment on lines 730 to +735
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the print() at line 734 precede the operation at line 731? (And, shouldn't the print() be referencing expected rather than access, since that's what line 732 does?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shows up as a diff because I rearranged code, but I didn't change the line.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True. But, I still think the lines are in the wrong order, etc. 🙂

assert not dataset.metadata["server.archiveonly"]
meta = server_client.get_metadata(
dataset.resource_id, metadata="dataset.access"
)
assert meta["dataset.access"] == expected
else:
assert dataset.metadata[
"server.archiveonly"
], f"Indexed dataset {dataset.name} failed to update with {response.json()['message']}"


class TestDelete:
Expand Down
2 changes: 1 addition & 1 deletion lib/pbench/test/unit/server/test_datasets_visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,5 @@ def mock_get_metadata(_d: Dataset, _k: str, _u: Optional[User] = None) -> JSON:
monkeypatch.setattr(Metadata, "getvalue", mock_get_metadata)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)
response = query_get_as("uperf_1", "test", HTTPStatus.BAD_REQUEST)
assert response.json["message"] == "Unsupported Benchmark: HAMMERDB"
assert response.json["message"] == "Unsupported Benchmark: hammerDB"
assert extract_not_called
1 change: 1 addition & 0 deletions lib/pbench/test/unit/server/test_endpoint_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def check_config(self, client, server_config, host, my_headers={}):
uri = urljoin(host, uri_prefix)
expected_results = {
"identification": f"Pbench server {server_config.COMMIT_ID}",
"visualization": {"benchmarks": ["uperf"]},
"uri": {
"datasets": {
"template": f"{uri}/datasets/{{dataset}}",
Expand Down
Loading