Review comments and extended unit testing

dbutenhof · dbutenhof · commit eb8ba0a72b23 · 2023-07-01T13:58:58.000-04:00
diff --git a/lib/pbench/server/api/resources/datasets_compare.py b/lib/pbench/server/api/resources/datasets_compare.py
@@ -100,7 +100,7 @@ def _get(
         stream_file = {}
         for dataset in datasets:
             try:
-                info = cache_m.filestream(dataset.resource_id, "result.csv")
+                info = cache_m.get_inventory(dataset.resource_id, "result.csv")
                 file = info["stream"].read().decode("utf-8")
                 info["stream"].close()
             except Exception as e:
diff --git a/lib/pbench/server/api/resources/datasets_inventory.py b/lib/pbench/server/api/resources/datasets_inventory.py
@@ -61,7 +61,7 @@ def _get(
 
         cache_m = CacheManager(self.config, current_app.logger)
         try:
-            file_info = cache_m.filestream(dataset.resource_id, target)
+            file_info = cache_m.get_inventory(dataset.resource_id, target)
         except TarballNotFound as e:
             raise APIAbort(HTTPStatus.NOT_FOUND, str(e))
 
@@ -89,6 +89,8 @@ def _get(
                 stream, as_attachment=target is None, download_name=file_info["name"]
             )
         except Exception as e:
+            if stream:
+                stream.close()
             raise APIInternalError(
                 f"Problem sending {dataset}:{target} stream {stream}: {str(e)!r}"
             )
diff --git a/lib/pbench/server/api/resources/datasets_visualize.py b/lib/pbench/server/api/resources/datasets_visualize.py
@@ -70,7 +70,7 @@ def _get(
 
         cache_m = CacheManager(self.config, current_app.logger)
         try:
-            info = cache_m.filestream(dataset.resource_id, "result.csv")
+            info = cache_m.get_inventory(dataset.resource_id, "result.csv")
             file = info["stream"].read().decode("utf-8")
             info["stream"].close()
         except Exception as e:
diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py
@@ -1,7 +1,6 @@
 from collections import deque
 from dataclasses import dataclass
 from enum import auto, Enum
-from io import SEEK_SET
 from logging import Logger
 from pathlib import Path
 import shlex
@@ -47,7 +46,7 @@ class CacheExtractBadPath(CacheManagerError):
     """Request to extract a path that's bad or not a file"""
 
     def __init__(self, tar_name: Path, path: Union[str, Path]):
-        self.name = Dataset.stem(tar_name)
+        self.name = tar_name.name
         self.path = str(path)
 
     def __str__(self) -> str:
@@ -214,27 +213,26 @@ def close(self):
 
     def getbuffer(self):
         """Return the underlying byte buffer (used by send_file)"""
-        return self.getbuffer()
+        return self.stream.getbuffer()
 
-    def read(self, size: int = -1, /) -> bytes:
+    def read(self, *args, **kwargs) -> bytes:
         """Encapsulate a read operation"""
-        return self.stream.read(size)
+        return self.stream.read(*args, **kwargs)
 
     def readable(self) -> bool:
         """Return the readable state of the stream"""
         return self.stream.readable()
 
-    def seek(self, offset: int, whence: int = SEEK_SET, /) -> int:
+    def seek(self, *args, **kwargs) -> int:
         """Allow setting the relative position in the stream"""
-        return self.stream.seek(offset, whence)
+        return self.stream.seek(*args, **kwargs)
 
-    def __str__(self) -> str:
+    def __repr__(self) -> str:
         """Return a string representation"""
-        return f"<Stream {self.stream} from {self.tarfile}"
+        return f"<Stream {self.stream} from {self.tarfile}>"
 
     def __iter__(self):
         """Allow iterating through lines in the buffer"""
-        self.stream.seek(0)
         return self
 
     def __next__(self):
@@ -537,17 +535,12 @@ def extract(tarball_path: Path, path: Path) -> Inventory:
                 raise CacheExtractBadPath(tarball_path, path)
             return Inventory(stream, tar)
 
-    def filestream(self, path: str) -> Optional[JSONOBJECT]:
+    def get_inventory(self, path: str) -> Optional[JSONOBJECT]:
         """Access the file stream of a tarball member file.
 
         Args:
             path: relative path within the tarball of a file
 
-        Returns:
-            An inventory object that mimics an IO[bytes] object while also
-            maintaining a reference to the tarfile TarFile object to be
-            closed later.
-
         Returns:
             Dictionary with file info and file stream
         """
@@ -574,7 +567,7 @@ def _get_metadata(tarball_path: Path) -> Optional[JSONOBJECT]:
         """
         name = Dataset.stem(tarball_path)
         try:
-            data: IO[bytes] = Tarball.extract(tarball_path, f"{name}/metadata.log")
+            data = Tarball.extract(tarball_path, f"{name}/metadata.log")
         except CacheExtractBadPath:
             return None
         else:
@@ -1119,7 +1112,7 @@ def get_info(self, dataset_id: str, path: Path) -> dict[str, Any]:
         tmap = tarball.get_info(path)
         return tmap
 
-    def filestream(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]:
+    def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]:
         """Return filestream data for a file within a dataset tarball
 
             {
@@ -1136,7 +1129,7 @@ def filestream(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]:
             File info including a byte stream for a regular file
         """
         tarball = self.find_dataset(dataset_id)
-        return tarball.filestream(target)
+        return tarball.get_inventory(target)
 
     def uncache(self, dataset_id: str):
         """Remove the unpacked tarball tree.
diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py
@@ -13,6 +13,7 @@
 from pbench.server.cache_manager import (
     BadDirpath,
     BadFilename,
+    CacheExtractBadPath,
     CacheManager,
     CacheType,
     Controller,
@@ -903,35 +904,117 @@ def test_filestream(
                 tmp_path, "dir_name"
             )
             tb.cache_map(tar_dir)
-            file_info = tb.filestream(file_path)
+            file_info = tb.get_inventory(file_path)
             assert file_info["type"] == exp_file_type
             assert file_info["stream"].stream == exp_stream
 
-    def test_filestream_tarfile_open(self, monkeypatch, tmp_path):
-        """Test to check non-existent file or tarfile unpack issue"""
-        tar = Path("/mock/dir_name.tar.xz")
-        cache = Path("/mock/.cache")
+    def test_tarfile_open_fails(self, monkeypatch, tmp_path):
+        """Test to check non-existent tarfile"""
+        tar = Path("/mock/result.tar.xz")
 
         def fake_tarfile_open(self, path):
             raise tarfile.TarError("Invalid Tarfile")
 
         with monkeypatch.context() as m:
-            m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__)
-            m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__)
             m.setattr(tarfile, "open", fake_tarfile_open)
-            tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None))
-            tar_dir = TestCacheManager.MockController.generate_test_result_tree(
-                tmp_path, "dir_name"
-            )
-            tb.cache_map(tar_dir)
 
-            expected_error_msg = (
-                f"The dataset tarball named '{tb.tarball_path}' is not found"
-            )
+            expected_error_msg = f"The dataset tarball named '{tar}' is not found"
             with pytest.raises(TarballNotFound) as exc:
-                tb.filestream("subdir1/f11.txt")
+                Tarball.extract(tar, Path("subdir1/f11.txt"))
+            assert str(exc.value) == expected_error_msg
+
+    def test_tarfile_extractfile_fails(self, monkeypatch, tmp_path):
+        """Test to check non-existent path in tarfile"""
+        tar = Path("/mock/result.tar.xz")
+        path = Path("subdir/f11.txt")
+
+        class MockTarFile:
+            def extractfile(self, path):
+                raise Exception("Mr Robot refuses trivial human command")
+
+        def fake_tarfile_open(self, path):
+            return MockTarFile()
+
+        with monkeypatch.context() as m:
+            m.setattr(tarfile, "open", fake_tarfile_open)
+            expected_error_msg = f"Unable to extract {path} from {tar.name}"
+            with pytest.raises(CacheExtractBadPath) as exc:
+                Tarball.extract(tar, path)
             assert str(exc.value) == expected_error_msg
 
+    def test_tarfile_extractfile_notfile(self, monkeypatch, tmp_path):
+        """Test to check target that's not a file"""
+        tar = Path("/mock/result.tar.xz")
+        path = Path("subdir/f11.txt")
+
+        class MockTarFile:
+            def extractfile(self, path):
+                return None
+
+        def fake_tarfile_open(self, path):
+            return MockTarFile()
+
+        with monkeypatch.context() as m:
+            m.setattr(tarfile, "open", fake_tarfile_open)
+            expected_error_msg = f"Unable to extract {path} from {tar.name}"
+            with pytest.raises(CacheExtractBadPath) as exc:
+                Tarball.extract(tar, path)
+            assert str(exc.value) == expected_error_msg
+
+    @pytest.mark.parametrize(
+        "tarball,stream", (("hasmetalog.tar.xz", True), ("nometalog.tar.xz", False))
+    )
+    def test_get_metadata(self, monkeypatch, tarball, stream):
+        """Verify access and processing of `metadata.log`"""
+
+        @staticmethod
+        def fake_extract(t: Path, f: Path):
+            if str(t) == tarball:
+                if str(f) == f"{Dataset.stem(t)}/metadata.log":
+                    if stream:
+                        return io.BytesIO(b"[test]\nfoo = bar\n")
+                    else:
+                        raise CacheExtractBadPath(t, f)
+            raise Exception(f"Unexpected mock exception with stream:{stream}: {t}, {f}")
+
+        with monkeypatch.context() as m:
+            m.setattr(Tarball, "extract", fake_extract)
+            metadata = Tarball._get_metadata(Path(tarball))
+
+        if stream:
+            assert metadata == {"test": {"foo": "bar"}}
+        else:
+            assert metadata is None
+
+    def test_inventory(self):
+        closed = False
+
+        class MockTarFile:
+            def close(self):
+                nonlocal closed
+                closed = True
+
+            def __repr__(self) -> str:
+                return "<Mock tarfile>"
+
+        raw = b"abcde\nfghij\n"
+        stream = Inventory(io.BytesIO(raw), MockTarFile())
+        assert re.match(
+            r"<Stream <_io.BytesIO object at 0x[a-z0-9]+> from <Mock tarfile>",
+            str(stream),
+        )
+
+        assert stream.getbuffer() == raw
+        assert stream.readable()
+        assert stream.read(5) == b"abcde"
+        assert stream.read() == b"\nfghij\n"
+        assert stream.seek(0) == 0
+        assert [b for b in stream] == [b"abcde\n", b"fghij\n"]
+        stream.close()
+        assert closed
+        with pytest.raises(ValueError):
+            stream.read()
+
     def test_find(
         self, selinux_enabled, server_config, make_logger, tarball, monkeypatch
     ):
diff --git a/lib/pbench/test/unit/server/test_datasets_compare.py b/lib/pbench/test/unit/server/test_datasets_compare.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 from typing import Any, Optional
 
-from pquisby.lib.post_processing import QuisbyProcessing
 import pytest
 import requests
 
@@ -56,46 +55,31 @@ def query_api(
 
         return query_api
 
-    class MockTarball:
-        tarball_path = Path("/dataset/tarball.tar.xz")
-        name = "tarball"
-
-        def filestream(self, _path: str) -> dict[str, Any]:
-            return {"stream": BytesIO(b"CSV_file_as_a_string")}
-
-    def mock_find_dataset(self, dataset) -> MockTarball:
-        # Validate the resource_id
-        Dataset.query(resource_id=dataset)
-        return self.MockTarball()
-
     def test_dataset_not_present(self, query_get_as, monkeypatch):
         monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
 
         query_get_as(["fio_2"], "drb", HTTPStatus.INTERNAL_SERVER_ERROR)
 
     def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch):
-        def mock_filestream(_path: str) -> dict[str, Any]:
+        def mock_get_inventory(_self, _dataset: str, _path: str) -> dict[str, Any]:
             return {"stream": BytesIO(b"IncorrectData")}
 
-        def mock_compare_csv_to_json(
-            self, benchmark_name, input_type, data_stream
-        ) -> JSON:
-            return {"status": "failed", "exception": "Unsupported Media Type"}
+        class MockQuisby:
+            def compare_csv_to_json(self, _b, _i, _d) -> JSON:
+                return {"status": "failed", "exception": "Unsupported Media Type"}
 
-        monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
-        monkeypatch.setattr(CacheManager, "filestream", mock_filestream)
+        monkeypatch.setattr(CacheManager, "get_inventory", mock_get_inventory)
         monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
         monkeypatch.setattr(
-            QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
+            "pbench.server.api.resources.datasets_compare.QuisbyProcessing", MockQuisby
         )
         query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)
 
     def test_tarball_unpack_exception(self, query_get_as, monkeypatch):
-        def mock_filestream(path: str) -> dict[str, Any]:
-            raise CacheExtractBadPath(Path("tarball"), path)
+        def mock_get_inventory(_self, _dataset: str, _path: str) -> dict[str, Any]:
+            raise CacheExtractBadPath(Path("tarball"), _path)
 
-        monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
-        monkeypatch.setattr(self.MockTarball, "filestream", mock_filestream)
+        monkeypatch.setattr(CacheManager, "get_inventory", mock_get_inventory)
         monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
         query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)
 
@@ -143,15 +127,17 @@ def mock_filestream(path: str) -> dict[str, Any]:
     def test_datasets_with_different_benchmark(
         self, user, datasets, exp_status, exp_message, query_get_as, monkeypatch
     ):
-        def mock_compare_csv_to_json(
-            self, benchmark_name, input_type, data_stream
-        ) -> JSON:
-            return {"status": "success", "json_data": "quisby_data"}
+        class MockQuisby:
+            def compare_csv_to_json(self, _b, _i, _d) -> JSON:
+                return {"status": "success", "json_data": "quisby_data"}
+
+        def mock_get_inventory(_self, _dataset: str, _path: str) -> dict[str, Any]:
+            return {"stream": BytesIO(b"IncorrectData")}
 
-        monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
+        monkeypatch.setattr(CacheManager, "get_inventory", mock_get_inventory)
         monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
         monkeypatch.setattr(
-            QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
+            "pbench.server.api.resources.datasets_compare.QuisbyProcessing", MockQuisby
         )
 
         response = query_get_as(datasets, user, exp_status)
diff --git a/lib/pbench/test/unit/server/test_datasets_inventory.py b/lib/pbench/test/unit/server/test_datasets_inventory.py
diff --git a/lib/pbench/test/unit/server/test_datasets_visualize.py b/lib/pbench/test/unit/server/test_datasets_visualize.py