diff --git a/backend/code_coverage_backend/api.py b/backend/code_coverage_backend/api.py index e407e8664..f972b4c0c 100644 --- a/backend/code_coverage_backend/api.py +++ b/backend/code_coverage_backend/api.py @@ -8,6 +8,7 @@ from code_coverage_backend.config import COVERAGE_EXTENSIONS from code_coverage_backend.gcp import load_cache +from code_coverage_backend.report import DEFAULT_FILTER DEFAULT_REPOSITORY = "mozilla-central" logger = structlog.get_logger(__name__) @@ -39,7 +40,13 @@ def coverage_latest(repository=DEFAULT_REPOSITORY): abort(404) -def coverage_for_path(path="", changeset=None, repository=DEFAULT_REPOSITORY): +def coverage_for_path( + path="", + changeset=None, + repository=DEFAULT_REPOSITORY, + platform=DEFAULT_FILTER, + suite=DEFAULT_FILTER, +): """ Aggregate coverage for a path, regardless of its type: * file, gives its coverage percent @@ -54,17 +61,17 @@ def coverage_for_path(path="", changeset=None, repository=DEFAULT_REPOSITORY): try: if changeset: # Find closest report matching this changeset - changeset, _ = gcp.find_closest_report(repository, changeset) + report = gcp.find_closest_report(repository, changeset, platform, suite) else: # Fallback to latest report - changeset, _ = gcp.find_report(repository) + report = gcp.find_report(repository, platform, suite) except Exception as e: logger.warn("Failed to retrieve report: {}".format(e)) abort(404) # Load tests data from GCP try: - return gcp.get_coverage(repository, changeset, path) + return gcp.get_coverage(report, path) except Exception as e: logger.warn( "Failed to load coverage", @@ -76,7 +83,14 @@ def coverage_for_path(path="", changeset=None, repository=DEFAULT_REPOSITORY): abort(400) -def coverage_history(repository=DEFAULT_REPOSITORY, path="", start=None, end=None): +def coverage_history( + repository=DEFAULT_REPOSITORY, + path="", + start=None, + end=None, + platform=DEFAULT_FILTER, + suite=DEFAULT_FILTER, +): """ List overall coverage from ingested reports over a period of time """ @@ -86,7 +100,7 @@ def coverage_history(repository=DEFAULT_REPOSITORY, path="", start=None, end=Non abort(500) try: - return gcp.get_history(repository, path=path, start=start, end=end) + return gcp.get_history(repository, path, start, end, platform, suite) except Exception as e: logger.warn( "Failed to load history", @@ -97,3 +111,22 @@ def coverage_history(repository=DEFAULT_REPOSITORY, path="", start=None, end=Non error=str(e), ) abort(400) + + +def coverage_filters(repository=DEFAULT_REPOSITORY): + """ + List all available filters for that repository + """ + gcp = load_cache() + if gcp is None: + logger.error("No GCP cache available") + abort(500) + + try: + return { + "platforms": gcp.get_platforms(repository), + "suites": gcp.get_suites(repository), + } + except Exception as e: + logger.warn("Failed to load filters", repo=repository, error=str(e)) + abort(400) diff --git a/backend/code_coverage_backend/api.yml b/backend/code_coverage_backend/api.yml index 0b506d8b5..76e12a8bc 100644 --- a/backend/code_coverage_backend/api.yml +++ b/backend/code_coverage_backend/api.yml @@ -42,6 +42,16 @@ paths: description: Mozilla repository for this changeset (default to mozilla-central) required: false type: string + - name: suite + in: query + description: Filter the coverage using this test suite + required: false + type: string + - name: platform + in: query + description: Filter the coverage using this platform + required: false + type: string responses: 200: description: Code coverage information for a given file or directory at a given changeset @@ -72,6 +82,16 @@ paths: description: Path of the repository folder to get coverage info on. required: false type: string + - name: suite + in: query + description: Filter the coverage using this test suite + required: false + type: string + - name: platform + in: query + description: Filter the coverage using this platform + required: false + type: string responses: 200: description: Overall coverage of specified path over a period of time @@ -86,3 +106,18 @@ paths: description: File extensions supported for code coverage tags: - v2 + + /v2/filters: + get: + operationId: "code_coverage_backend.api.coverage_filters" + parameters: + - name: repository + in: query + description: Mozilla repository for these reports (default to mozilla-central) + required: false + type: string + responses: + 200: + description: Available filters on the endpoints + tags: + - v2 diff --git a/backend/code_coverage_backend/gcp.py b/backend/code_coverage_backend/gcp.py index a1660374e..f03326322 100644 --- a/backend/code_coverage_backend/gcp.py +++ b/backend/code_coverage_backend/gcp.py @@ -7,25 +7,27 @@ from datetime import datetime import redis -import requests import structlog import zstandard as zstd from dateutil.relativedelta import relativedelta from code_coverage_backend import covdir from code_coverage_backend import taskcluster +from code_coverage_backend.hgmo import hgmo_pushes +from code_coverage_backend.hgmo import hgmo_revision_details +from code_coverage_backend.report import DEFAULT_FILTER +from code_coverage_backend.report import Report from code_coverage_tools.gcp import get_bucket logger = structlog.get_logger(__name__) __cache = None +__hgmo = {} -KEY_REPORTS = "reports:{repository}" +KEY_REPORTS = "reports:{repository}:{platform}:{suite}" KEY_CHANGESET = "changeset:{repository}:{changeset}" KEY_HISTORY = "history:{repository}" -KEY_OVERALL_COVERAGE = "overall:{repository}:{changeset}" - -HGMO_REVISION_URL = "https://hg.mozilla.org/{repository}/json-rev/{revision}" -HGMO_PUSHES_URL = "https://hg.mozilla.org/{repository}/json-pushes" +KEY_PLATFORMS = "platforms:{repository}" +KEY_SUITES = "suites:{repository}" REPOSITORIES = ("mozilla-central",) @@ -48,18 +50,6 @@ def load_cache(): return __cache -def hgmo_revision_details(repository, changeset): - """ - HGMO helper to retrieve details for a changeset - """ - url = HGMO_REVISION_URL.format(repository=repository, revision=changeset) - resp = requests.get(url) - resp.raise_for_status() - data = resp.json() - assert "pushid" in data, "Missing pushid" - return data["pushid"], data["date"][0] - - class GCPCache(object): """ Cache on Redis GCP results @@ -85,108 +75,114 @@ def __init__(self, reports_dir=None): # Load most recent reports in cache for repo in REPOSITORIES: - for rev, _ in self.list_reports(repo, nb=1): - self.download_report(repo, rev) + for report in self.list_reports(repo, nb=1): + self.download_report(report) - def ingest_pushes(self, repository, min_push_id=None, nb_pages=3): + def ingest_pushes(self, repository, platform, suite, min_push_id=None, nb_pages=3): """ Ingest HGMO changesets and pushes into our Redis Cache The pagination goes from oldest to newest, starting from the optional min_push_id """ - chunk_size = 8 - params = {"version": 2} - if min_push_id is not None: - assert isinstance(min_push_id, int) - params["startID"] = min_push_id - params["endID"] = min_push_id + chunk_size - - for page in range(nb_pages): - - r = requests.get( - HGMO_PUSHES_URL.format(repository=repository), params=params - ) - data = r.json() - - # Sort pushes to go from oldest to newest - pushes = sorted( - [(int(push_id), push) for push_id, push in data["pushes"].items()], - key=lambda p: p[0], - ) - if not pushes: - return - - for push_id, push in pushes: - - changesets = push["changesets"] - date = push["date"] - self.store_push(repository, push_id, changesets, date) - - reports = [ - changeset - for changeset in changesets - if self.ingest_report(repository, push_id, changeset, date) - ] - if reports: - logger.info("Found reports in that push", push_id=push_id) - - newest = pushes[-1][0] - params["startID"] = newest - params["endID"] = newest + chunk_size - - def ingest_report(self, repository, push_id, changeset, date): + ingested = False + for push_id, push in hgmo_pushes(repository, min_push_id, nb_pages): + for changeset in push["changesets"]: + report = Report( + self.reports_dir, + repository, + changeset, + platform, + suite, + push_id=push_id, + date=push["date"], + ) + + # Always link changeset to push to find closest available report + self.redis.hmset( + KEY_CHANGESET.format( + repository=report.repository, changeset=report.changeset + ), + {"push": report.push_id, "date": report.date}, + ) + + if not ingested and self.ingest_report(report): + logger.info( + "Found report in that push", push_id=push_id, report=str(report) + ) + + # Only ingest first report found in a push in order to stay below 30s response time + ingested = True + + def ingest_report(self, report): """ When a report exist for a changeset, download it and update redis data """ - assert isinstance(push_id, int) - assert isinstance(date, int) + assert isinstance(report, Report) # Download the report - report_path = self.download_report(repository, changeset) - if not report_path: + if not self.download_report(report): + logger.info("Report not available", report=str(report)) return False # Read overall coverage for history - key = KEY_OVERALL_COVERAGE.format(repository=repository, changeset=changeset) - report = covdir.open_report(report_path) - assert report is not None, "No report to ingest" - overall_coverage = covdir.get_overall_coverage(report) + data = covdir.open_report(report.path) + assert data is not None, "No report to ingest" + overall_coverage = covdir.get_overall_coverage(data) assert len(overall_coverage) > 0, "No overall coverage" - self.redis.hmset(key, overall_coverage) + self.redis.hmset(report.key_overall, overall_coverage) # Add the changeset to the sorted sets of known reports # The numeric push_id is used as a score to keep the ingested # changesets ordered - self.redis.zadd(KEY_REPORTS.format(repository=repository), {changeset: push_id}) + self.redis.zadd( + KEY_REPORTS.format( + repository=report.repository, + platform=report.platform, + suite=report.suite, + ), + {report.changeset: report.push_id}, + ) # Add the changeset to the sorted sets of known reports by date - self.redis.zadd(KEY_HISTORY.format(repository=repository), {changeset: date}) + self.redis.zadd( + KEY_HISTORY.format(repository=report.repository), + {report.changeset: report.date}, + ) - logger.info("Ingested report", changeset=changeset) + # Store the filters + if report.platform != DEFAULT_FILTER: + self.redis.sadd( + KEY_PLATFORMS.format(repository=report.repository), report.platform + ) + if report.suite != DEFAULT_FILTER: + self.redis.sadd( + KEY_SUITES.format(repository=report.repository), report.suite + ) + + logger.info("Ingested report", report=str(report)) return True - def download_report(self, repository, changeset): + def download_report(self, report): """ Download and extract a json+zstd covdir report """ + assert isinstance(report, Report) + # Check the report is available on remote storage - path = "{}/{}.json.zstd".format(repository, changeset) - blob = self.bucket.blob(path) + blob = self.bucket.blob(report.gcp_path) if not blob.exists(): - logger.debug("No report found on GCP", path=path) + logger.debug("No report found on GCP", path=report.gcp_path) return False - archive_path = os.path.join(self.reports_dir, blob.name) - json_path = os.path.join(self.reports_dir, blob.name.rstrip(".zstd")) - if os.path.exists(json_path): - logger.info("Report already available", path=json_path) - return json_path + if os.path.exists(report.path): + logger.info("Report already available", path=report.path) + return True - os.makedirs(os.path.dirname(archive_path), exist_ok=True) - blob.download_to_filename(archive_path) - logger.info("Downloaded report archive", path=archive_path) + os.makedirs(os.path.dirname(report.archive_path), exist_ok=True) + blob.download_to_filename(report.archive_path) + logger.info("Downloaded report archive", path=report.archive_path) - with open(json_path, "wb") as output: - with open(archive_path, "rb") as archive: + with open(report.path, "wb") as output: + with open(report.archive_path, "rb") as archive: dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(archive) while True: @@ -195,34 +191,30 @@ def download_report(self, repository, changeset): break output.write(chunk) - os.unlink(archive_path) - logger.info("Decompressed report", path=json_path) - return json_path - - def store_push(self, repository, push_id, changesets, date): - """ - Store a push on redis cache, with its changesets - """ - assert isinstance(push_id, int) - assert isinstance(changesets, list) - - # Store changesets initial data - for changeset in changesets: - key = KEY_CHANGESET.format(repository=repository, changeset=changeset) - self.redis.hmset(key, {"push": push_id, "date": date}) - - logger.info("Stored new push data", push_id=push_id) + os.unlink(report.archive_path) + logger.info("Decompressed report", path=report.path) + return True - def find_report(self, repository, push_range=(MAX_PUSH, MIN_PUSH)): + def find_report( + self, + repository, + platform=DEFAULT_FILTER, + suite=DEFAULT_FILTER, + push_range=(MAX_PUSH, MIN_PUSH), + ): """ Find the first report available before that push """ - results = self.list_reports(repository, nb=1, push_range=push_range) + results = self.list_reports( + repository, platform, suite, nb=1, push_range=push_range + ) if not results: raise Exception("No report found") return results[0] - def find_closest_report(self, repository, changeset): + def find_closest_report( + self, repository, changeset, platform=DEFAULT_FILTER, suite=DEFAULT_FILTER + ): """ Find the closest report from specified changeset: 1. Lookup the changeset push in cache @@ -236,18 +228,43 @@ def find_closest_report(self, repository, changeset): if push_id: # Redis lib uses bytes for all output push_id = int(push_id.decode("utf-8")) + date = self.redis.hget(key, "date").decode("utf-8") + + # Check the report variant is available locally + report = Report( + self.reports_dir, + repository, + changeset, + platform, + suite, + push_id=push_id, + date=date, + ) + if not os.path.exists(report.path): + self.ingest_report(report) else: # Lookup push from HGMO (slow) push_id, _ = hgmo_revision_details(repository, changeset) # Ingest pushes as we clearly don't have it in cache - self.ingest_pushes(repository, min_push_id=push_id - 1, nb_pages=1) + self.ingest_pushes( + repository, platform, suite, min_push_id=push_id - 1, nb_pages=1 + ) # Load report from that push - return self.find_report(repository, push_range=(push_id, MAX_PUSH)) + return self.find_report( + repository, platform, suite, push_range=(push_id, MAX_PUSH) + ) - def list_reports(self, repository, nb=5, push_range=(MAX_PUSH, MIN_PUSH)): + def list_reports( + self, + repository, + platform=DEFAULT_FILTER, + suite=DEFAULT_FILTER, + nb=5, + push_range=(MAX_PUSH, MIN_PUSH), + ): """ List the last reports available on the server, ordered by push by default from newer to older @@ -262,7 +279,7 @@ def list_reports(self, repository, nb=5, push_range=(MAX_PUSH, MIN_PUSH)): op = self.redis.zrangebyscore if start < end else self.redis.zrevrangebyscore reports = op( - KEY_REPORTS.format(repository=repository), + KEY_REPORTS.format(repository=repository, platform=platform, suite=suite), start, end, start=0, @@ -270,33 +287,45 @@ def list_reports(self, repository, nb=5, push_range=(MAX_PUSH, MIN_PUSH)): withscores=True, ) - return [(changeset.decode("utf-8"), int(push)) for changeset, push in reports] + return [ + Report( + self.reports_dir, + repository, + changeset.decode("utf-8"), + platform, + suite, + push_id=push, + ) + for changeset, push in reports + ] - def get_coverage(self, repository, changeset, path): + def get_coverage(self, report, path): """ Load a report and its coverage for a specific path and build a serializable representation """ - report_path = os.path.join( - self.reports_dir, "{}/{}.json".format(repository, changeset) - ) - - report = covdir.open_report(report_path) - if report is None: + assert isinstance(report, Report) + data = covdir.open_report(report.path) + if data is None: # Try to download the report if it's missing locally - report_path = self.download_report(repository, changeset) - assert report_path is not False, "Missing report for {} at {}".format( - repository, changeset - ) + assert self.download_report(report), "Missing report {}".format(report) - report = covdir.open_report(report_path) - assert report + data = covdir.open_report(report.path) + assert data - out = covdir.get_path_coverage(report, path) - out["changeset"] = changeset + out = covdir.get_path_coverage(data, path) + out["changeset"] = report.changeset return out - def get_history(self, repository, path="", start=None, end=None): + def get_history( + self, + repository, + path="", + start=None, + end=None, + platform=DEFAULT_FILTER, + suite=DEFAULT_FILTER, + ): """ Load the history overall coverage from the redis cache Default to date range from now back to a year @@ -318,22 +347,36 @@ def get_history(self, repository, path="", start=None, end=None): def _coverage(changeset, date): # Load overall coverage for specified path changeset = changeset.decode("utf-8") - key = KEY_OVERALL_COVERAGE.format( - repository=repository, changeset=changeset + + report = Report( + self.reports_dir, repository, changeset, platform, suite, date=date ) - coverage = self.redis.hget(key, path) + coverage = self.redis.hget(report.key_overall, path) if coverage is not None: coverage = float(coverage) return {"changeset": changeset, "date": int(date), "coverage": coverage} return [_coverage(changeset, date) for changeset, date in history] + def get_platforms(self, repository): + """List all available platforms for a repository""" + platforms = self.redis.smembers(KEY_PLATFORMS.format(repository=repository)) + return sorted(map(lambda x: x.decode("utf-8"), platforms)) + + def get_suites(self, repository): + """List all available suites for a repository""" + suites = self.redis.smembers(KEY_SUITES.format(repository=repository)) + return sorted(map(lambda x: x.decode("utf-8"), suites)) + def ingest_available_reports(self, repository): """ Ingest all the available reports for a repository """ assert isinstance(repository, str) - REGEX_BLOB = re.compile(r"^{}/(\w+).json.zstd$".format(repository)) + + REGEX_BLOB = re.compile( + r"^{}/(\w+)/([\w\-]+):([\w\-]+).json.zstd$".format(repository) + ) for blob in self.bucket.list_blobs(prefix=repository): # Get changeset from blob name @@ -342,10 +385,9 @@ def ingest_available_reports(self, repository): logger.warn("Invalid blob found {}".format(blob.name)) continue changeset = match.group(1) + platform = match.group(2) + suite = match.group(3) - # Get extra information from HGMO - push_id, date = hgmo_revision_details(repository, changeset) - logger.info("Found report", changeset=changeset, push=push_id) - - # Ingest report - self.ingest_report(repository, push_id, changeset, int(date)) + # Build report instance and ingest it + report = Report(self.reports_dir, repository, changeset, platform, suite) + self.ingest_report(report) diff --git a/backend/code_coverage_backend/hgmo.py b/backend/code_coverage_backend/hgmo.py new file mode 100644 index 000000000..de9d4fcee --- /dev/null +++ b/backend/code_coverage_backend/hgmo.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import requests +import structlog + +logger = structlog.get_logger(__name__) + +__hgmo = {} + +HGMO_REVISION_URL = "https://hg.mozilla.org/{repository}/json-rev/{revision}" +HGMO_PUSHES_URL = "https://hg.mozilla.org/{repository}/json-pushes" + + +def hgmo_revision_details(repository, changeset): + """ + HGMO helper to retrieve details for a changeset + """ + # Check cache first + key = (repository, changeset) + if key in __hgmo: + return __hgmo[key] + + url = HGMO_REVISION_URL.format(repository=repository, revision=changeset) + resp = requests.get(url) + resp.raise_for_status() + data = resp.json() + assert "pushid" in data, "Missing pushid" + out = data["pushid"], data["date"][0] + + # Store in cache + __hgmo[key] = out + return out + + +def hgmo_pushes(repository, min_push_id, nb_pages, chunk_size=8): + """ + HGMO helper to list all pushes in a limited number of pages + """ + params = {"version": 2} + if min_push_id is not None: + assert isinstance(min_push_id, int) + params["startID"] = min_push_id + params["endID"] = min_push_id + chunk_size + + for page in range(nb_pages): + r = requests.get(HGMO_PUSHES_URL.format(repository=repository), params=params) + data = r.json() + + # Sort pushes to go from oldest to newest + pushes = sorted( + [(int(push_id), push) for push_id, push in data["pushes"].items()], + key=lambda p: p[0], + ) + if not pushes: + return + + for push in pushes: + yield push + + newest = pushes[-1][0] + params["startID"] = newest + params["endID"] = newest + chunk_size diff --git a/backend/code_coverage_backend/report.py b/backend/code_coverage_backend/report.py new file mode 100644 index 000000000..bdbcce6f0 --- /dev/null +++ b/backend/code_coverage_backend/report.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os + +import structlog + +from code_coverage_backend.hgmo import hgmo_revision_details + +logger = structlog.get_logger(__name__) + +DEFAULT_FILTER = "all" + + +class Report(object): + """ + A single coverage report + """ + + def __init__( + self, + base_dir, + repository, + changeset, + platform=DEFAULT_FILTER, + suite=DEFAULT_FILTER, + push_id=None, + date=None, + ): + assert isinstance(repository, str) + assert isinstance(changeset, str) + self.base_dir = base_dir + self.repository = repository + self.changeset = changeset + self.platform = platform + self.suite = suite + + # Get extra information from HGMO + if push_id or date: + self.push_id = push_id + self.date = date + else: + self.push_id, date = hgmo_revision_details(repository, changeset) + self.date = int(date) + + def __str__(self): + return self.name + + def __repr__(self): + return self.name + + def __eq__(self, other): + + return isinstance(other, Report) and ( + self.base_dir, + self.repository, + self.changeset, + self.platform, + self.suite, + ) == ( + other.base_dir, + other.repository, + other.changeset, + other.platform, + other.suite, + ) + + @property + def name(self): + return "{}/{}/{}:{}".format( + self.repository, self.changeset, self.platform, self.suite + ) + + @property + def path(self): + """Local path on FS, decompressed""" + return os.path.join(self.base_dir, f"{self.name}.json") + + @property + def archive_path(self): + """Local path on FS, compressed""" + return f"{self.path}.zstd" + + @property + def gcp_path(self): + """Remote path on GCP storage""" + return f"{self.name}.json.zstd" + + @property + def key_overall(self): + """Redis key to store the overall coverage data for this report""" + platform = self.platform or "all" + suite = self.suite or "all" + return f"overall:{self.repository}:{self.changeset}:{platform}:{suite}" diff --git a/backend/tests/test_gcp.py b/backend/tests/test_gcp.py index 9ed9fac1b..0a22e78cf 100644 --- a/backend/tests/test_gcp.py +++ b/backend/tests/test_gcp.py @@ -6,44 +6,33 @@ import pytest - -def test_store_push(mock_cache): - """ - Test base method to store a push & changesets on redis - """ - assert mock_cache.redis.keys("*") == [] - mock_cache.store_push("myrepo", 1234, ["deadbeef", "coffee"], 111222333) - - assert mock_cache.redis.keys("*") == [ - b"changeset:myrepo:deadbeef", - b"changeset:myrepo:coffee", - ] - assert mock_cache.redis.hgetall("changeset:myrepo:deadbeef") == { - b"push": b"1234", - b"date": b"111222333", - } - assert mock_cache.redis.hgetall("changeset:myrepo:coffee") == { - b"push": b"1234", - b"date": b"111222333", - } +from code_coverage_backend.report import Report def test_download_report(mock_cache): """ Test base method to download a report & store it on local FS """ - mock_cache.bucket.add_mock_blob("myrepo/deadbeef123.json.zstd") + mock_cache.bucket.add_mock_blob("myrepo/deadbeef123/all:all.json.zstd") # Does not exist - assert mock_cache.download_report("myrepo", "missing") is False + report = Report(mock_cache.reports_dir, "myrepo", "missing", date=1, push_id=1) + assert mock_cache.download_report(report) is False - archive = os.path.join(mock_cache.reports_dir, "myrepo", "deadbeef123.json.zstd") - payload = os.path.join(mock_cache.reports_dir, "myrepo", "deadbeef123.json") + archive = os.path.join( + mock_cache.reports_dir, "myrepo", "deadbeef123", "all:all.json.zstd" + ) + payload = os.path.join( + mock_cache.reports_dir, "myrepo", "deadbeef123", "all:all.json" + ) assert not os.path.exists(archive) assert not os.path.exists(payload) # Valid blob - assert mock_cache.download_report("myrepo", "deadbeef123") == payload + report = Report(mock_cache.reports_dir, "myrepo", "deadbeef123", date=1, push_id=1) + assert mock_cache.download_report(report) is True + assert archive == report.archive_path + assert payload == report.path # Only the payload remains after download assert not os.path.exists(archive) @@ -51,15 +40,17 @@ def test_download_report(mock_cache): assert json.load(open(payload)) == {"children": {}, "coveragePercent": 0.0} + assert mock_cache.redis.keys("*") == [] + def test_ingestion(mock_cache): """ Test ingestion of several reports and their retrieval through Redis index """ # Setup blobs - mock_cache.bucket.add_mock_blob("myrepo/rev1.json.zstd", coverage=0.1) - mock_cache.bucket.add_mock_blob("myrepo/rev2.json.zstd", coverage=0.2) - mock_cache.bucket.add_mock_blob("myrepo/rev10.json.zstd", coverage=1.0) + mock_cache.bucket.add_mock_blob("myrepo/rev1/all:all.json.zstd", coverage=0.1) + mock_cache.bucket.add_mock_blob("myrepo/rev2/all:all.json.zstd", coverage=0.2) + mock_cache.bucket.add_mock_blob("myrepo/rev10/all:all.json.zstd", coverage=1.0) # No reports at first assert mock_cache.redis.zcard(b"reports:myrepo") == 0 @@ -67,25 +58,30 @@ def test_ingestion(mock_cache): assert mock_cache.list_reports("myrepo") == [] # Ingest those 3 reports - mock_cache.ingest_report("myrepo", 1, "rev1", 1000) - mock_cache.ingest_report("myrepo", 2, "rev2", 2000) - mock_cache.ingest_report("myrepo", 10, "rev10", 9000) + report_1 = Report(mock_cache.reports_dir, "myrepo", "rev1", date=1000, push_id=1) + report_2 = Report(mock_cache.reports_dir, "myrepo", "rev2", date=2000, push_id=2) + report_10 = Report(mock_cache.reports_dir, "myrepo", "rev10", date=9000, push_id=10) + mock_cache.ingest_report(report_1) + mock_cache.ingest_report(report_2) + mock_cache.ingest_report(report_10) # They must be in redis and on the file system - assert mock_cache.redis.zcard(b"reports:myrepo") == 3 + assert mock_cache.redis.zcard(b"reports:myrepo:all:all") == 3 assert mock_cache.redis.zcard(b"history:myrepo") == 3 - assert os.path.exists(os.path.join(mock_cache.reports_dir, "myrepo", "rev1.json")) - assert os.path.exists(os.path.join(mock_cache.reports_dir, "myrepo", "rev2.json")) - assert os.path.exists(os.path.join(mock_cache.reports_dir, "myrepo", "rev10.json")) + assert os.path.exists( + os.path.join(mock_cache.reports_dir, "myrepo", "rev1", "all:all.json") + ) + assert os.path.exists( + os.path.join(mock_cache.reports_dir, "myrepo", "rev2", "all:all.json") + ) + assert os.path.exists( + os.path.join(mock_cache.reports_dir, "myrepo", "rev10", "all:all.json") + ) # Reports are exposed, and sorted by push assert mock_cache.list_reports("another") == [] - assert mock_cache.list_reports("myrepo") == [ - ("rev10", 10), - ("rev2", 2), - ("rev1", 1), - ] - assert mock_cache.find_report("myrepo") == ("rev10", 10) + assert mock_cache.list_reports("myrepo") == [report_10, report_2, report_1] + assert mock_cache.find_report("myrepo") == report_10 assert mock_cache.get_history("myrepo", start=200, end=20000) == [ {"changeset": "rev10", "coverage": 1.0, "date": 9000}, {"changeset": "rev2", "coverage": 0.2, "date": 2000}, @@ -93,16 +89,17 @@ def test_ingestion(mock_cache): ] # Even if we add a smaller one later on, reports are still sorted - mock_cache.bucket.add_mock_blob("myrepo/rev5.json.zstd", coverage=0.5) - mock_cache.ingest_report("myrepo", 5, "rev5", 5000) + mock_cache.bucket.add_mock_blob("myrepo/rev5/all:all.json.zstd", coverage=0.5) + report_5 = Report(mock_cache.reports_dir, "myrepo", "rev5", date=5000, push_id=5) + mock_cache.ingest_report(report_5) assert mock_cache.list_reports("myrepo") == [ - ("rev10", 10), - ("rev5", 5), - ("rev2", 2), - ("rev1", 1), + report_10, + report_5, + report_2, + report_1, ] - assert mock_cache.find_report("myrepo") == ("rev10", 10) - assert mock_cache.find_report("myrepo", push_range=(7, 0)) == ("rev5", 5) + assert mock_cache.find_report("myrepo") == report_10 + assert mock_cache.find_report("myrepo", push_range=(7, 0)) == report_5 assert mock_cache.get_history("myrepo", start=200, end=20000) == [ {"changeset": "rev10", "coverage": 1.0, "date": 9000}, {"changeset": "rev5", "coverage": 0.5, "date": 5000}, @@ -118,14 +115,18 @@ def test_ingest_hgmo(mock_cache, mock_hgmo): # Add a report on push 995 rev = hashlib.md5(b"995").hexdigest() - mock_cache.bucket.add_mock_blob("myrepo/{}.json.zstd".format(rev), coverage=0.5) + mock_cache.bucket.add_mock_blob( + "myrepo/{}/all:all.json.zstd".format(rev), coverage=0.5 + ) # Ingest last pushes assert mock_cache.list_reports("myrepo") == [] assert len(mock_cache.redis.keys("changeset:myrepo:*")) == 0 - mock_cache.ingest_pushes("myrepo") + mock_cache.ingest_pushes("myrepo", "all", "all") assert len(mock_cache.redis.keys("changeset:myrepo:*")) > 0 - assert mock_cache.list_reports("myrepo") == [(rev, 995)] + assert mock_cache.list_reports("myrepo") == [ + Report(mock_cache.reports_dir, "myrepo", rev, push_id=1, date=995) + ] def test_closest_report(mock_cache, mock_hgmo): @@ -150,34 +151,38 @@ def test_closest_report(mock_cache, mock_hgmo): # Add a report on 994, 2 pushes after our revision report_rev = hashlib.md5(b"994").hexdigest() mock_cache.bucket.add_mock_blob( - "myrepo/{}.json.zstd".format(report_rev), coverage=0.5 + "myrepo/{}/all:all.json.zstd".format(report_rev), coverage=0.5 + ) + report_994 = Report( + mock_cache.reports_dir, "myrepo", report_rev, push_id=1, date=994 ) # Add a report on 990, 2 pushes before our revision base_rev = hashlib.md5(b"990").hexdigest() mock_cache.bucket.add_mock_blob( - "myrepo/{}.json.zstd".format(base_rev), coverage=0.4 + "myrepo/{}/all:all.json.zstd".format(base_rev), coverage=0.4 ) + report_990 = Report(mock_cache.reports_dir, "myrepo", base_rev, push_id=1, date=990) # Now we have a report ! assert mock_cache.list_reports("myrepo") == [] - assert mock_cache.find_closest_report("myrepo", revision) == (report_rev, 994) - assert mock_cache.list_reports("myrepo") == [(report_rev, 994)] + assert mock_cache.find_closest_report("myrepo", revision) == report_994 + assert mock_cache.list_reports("myrepo") == [report_994] # This should also work for revisions before revision = "991{}".format(uuid.uuid4().hex[3:]) - assert mock_cache.find_closest_report("myrepo", revision) == (report_rev, 994) + assert mock_cache.find_closest_report("myrepo", revision) == report_994 # ... and the revision on the push itself revision = "994{}".format(uuid.uuid4().hex[3:]) - assert mock_cache.find_closest_report("myrepo", revision) == (report_rev, 994) + assert mock_cache.find_closest_report("myrepo", revision) == report_994 # We can also retrieve the base revision revision = "990{}".format(uuid.uuid4().hex[3:]) - assert mock_cache.find_closest_report("myrepo", revision) == (base_rev, 990) + assert mock_cache.find_closest_report("myrepo", revision) == report_990 revision = "989{}".format(uuid.uuid4().hex[3:]) - assert mock_cache.find_closest_report("myrepo", revision) == (base_rev, 990) - assert mock_cache.list_reports("myrepo") == [(report_rev, 994), (base_rev, 990)] + assert mock_cache.find_closest_report("myrepo", revision) == report_990 + assert mock_cache.list_reports("myrepo") == [report_994, report_990] # But not for revisions after the push revision = "995{}".format(uuid.uuid4().hex[3:]) @@ -191,15 +196,16 @@ def test_get_coverage(mock_cache): Test coverage access with re-download """ # No report at first + report = Report(mock_cache.reports_dir, "myrepo", "myhash", push_id=1, date=1) with pytest.raises(AssertionError) as e: - mock_cache.get_coverage("myrepo", "myhash", "") - assert str(e.value) == "Missing report for myrepo at myhash" + mock_cache.get_coverage(report, "") + assert str(e.value) == "Missing report myrepo/myhash/all:all" # Report available online - mock_cache.bucket.add_mock_blob("myrepo/myhash.json.zstd") + mock_cache.bucket.add_mock_blob("myrepo/myhash/all:all.json.zstd") # Coverage available - coverage = mock_cache.get_coverage("myrepo", "myhash", "") + coverage = mock_cache.get_coverage(report, "") assert coverage == { "children": [], "coveragePercent": 0.0, @@ -209,12 +215,12 @@ def test_get_coverage(mock_cache): } # Remove local file - path = os.path.join(mock_cache.reports_dir, "myrepo", "myhash.json") + path = os.path.join(mock_cache.reports_dir, "myrepo", "myhash", "all:all.json") assert os.path.exists(path) os.unlink(path) # Coverage still available - coverage = mock_cache.get_coverage("myrepo", "myhash", "") + coverage = mock_cache.get_coverage(report, "") assert coverage == { "children": [], "coveragePercent": 0.0, @@ -229,7 +235,7 @@ def test_get_coverage(mock_cache): f.write("break") # Coverage still available - coverage = mock_cache.get_coverage("myrepo", "myhash", "") + coverage = mock_cache.get_coverage(report, "") assert coverage == { "children": [], "coveragePercent": 0.0,