mozilla · La0 · Sep 4, 2019 · Aug 19, 2019 · Aug 21, 2019 · Aug 21, 2019
diff --git a/bot/code_coverage_bot/artifacts.py b/bot/code_coverage_bot/artifacts.py
@@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
+import collections
 import fnmatch
+import itertools
 import os
 import time
 
@@ -11,6 +13,9 @@
 logger = structlog.get_logger(__name__)
 
 
+Artifact = collections.namedtuple("Artifact", "path, task_id, platform, suite, chunk")
+
+
 SUITES_TO_IGNORE = [
     "awsy",
     "talos",
@@ -25,41 +30,74 @@ def __init__(self, task_ids, parent_dir="ccov-artifacts", task_name_filter="*"):
         self.task_ids = task_ids
         self.parent_dir = parent_dir
         self.task_name_filter = task_name_filter
+        self.artifacts = []
 
     def generate_path(self, platform, chunk, artifact):
         file_name = "%s_%s_%s" % (platform, chunk, os.path.basename(artifact["name"]))
         return os.path.join(self.parent_dir, file_name)
 
     def get_chunks(self, platform):
         return set(
-            f.split("_")[1]
-            for f in os.listdir(self.parent_dir)
-            if os.path.basename(f).startswith(f"{platform}_")
+            artifact.chunk
+            for artifact in self.artifacts
+            if artifact.platform == platform
         )
 
-    def get(self, platform=None, suite=None, chunk=None):
-        files = os.listdir(self.parent_dir)
+    def get_combinations(self):
+        # Add the full report
+        out = collections.defaultdict(list)
+        out[("all", "all")] = [artifact.path for artifact in self.artifacts]
+
+        # Group by suite first
+        suites = itertools.groupby(
+            sorted(self.artifacts, key=lambda a: a.suite), lambda a: a.suite
+        )
+        for suite, artifacts in suites:
+            artifacts = list(artifacts)
+
+            # List all available platforms
+            platforms = {a.platform for a in artifacts}
+            platforms.add("all")
+
+            # And list all possible permutations with suite + platform
+            out[("all", suite)] += [artifact.path for artifact in artifacts]
+            for platform in platforms:
+                if platform != "all":
+                    out[(platform, "all")] += [
+                        artifact.path
+                        for artifact in artifacts
+                        if artifact.platform == platform
+                    ]
+                out[(platform, suite)] = [
+                    artifact.path
+                    for artifact in artifacts
+                    if platform == "all" or artifact.platform == platform
+                ]
+
+        return out
 
+    def get(self, platform=None, suite=None, chunk=None):
         if suite is not None and chunk is not None:
             raise Exception("suite and chunk can't both have a value")
 
         # Filter artifacts according to platform, suite and chunk.
         filtered_files = []
-        for fname in files:
-            if platform is not None and not fname.startswith("%s_" % platform):
+        for artifact in self.artifacts:
+            if platform is not None and artifact.platform != platform:
                 continue
 
-            if suite is not None and suite not in fname:
+            if suite is not None and artifact.suite != suite:
                 continue
 
-            if chunk is not None and ("%s_code-coverage" % chunk) not in fname:
+            if chunk is not None and artifact.chunk != chunk:
                 continue
 
-            filtered_files.append(os.path.join(self.parent_dir, fname))
+            filtered_files.append(artifact.path)
 
         return filtered_files
 
     def download(self, test_task):
+        suite = taskcluster.get_suite(test_task["task"])
         chunk_name = taskcluster.get_chunk(test_task["task"])
         platform_name = taskcluster.get_platform(test_task["task"])
         test_task_id = test_task["status"]["taskId"]
@@ -75,6 +113,10 @@ def download(self, test_task):
             taskcluster.download_artifact(artifact_path, test_task_id, artifact["name"])
             logger.info("%s artifact downloaded" % artifact_path)
 
+            self.artifacts.append(
+                Artifact(artifact_path, test_task_id, platform_name, suite, chunk_name)
+            )
+
     def is_filtered_task(self, task):
         """
         Apply name filter from CLI args on task name

diff --git a/bot/code_coverage_bot/codecov.py b/bot/code_coverage_bot/codecov.py
@@ -42,6 +42,7 @@ def __init__(self, repository, revision, task_name_filter, cache_root):
 
         temp_dir = tempfile.mkdtemp()
         self.artifacts_dir = os.path.join(temp_dir, "ccov-artifacts")
+        self.reports_dir = os.path.join(temp_dir, "ccov-reports")
 
         self.index_service = taskcluster_config.get_service("index")
 
@@ -118,27 +119,56 @@ def retrieve_source_and_artifacts(self):
             # Thread 2 - Clone repository.
             executor.submit(self.clone_repository, self.repository, self.revision)
 
-    def generate_covdir(self):
+    def build_reports(self, only=None):
         """
-        Build the covdir report using current artifacts
+        Build all the possible covdir reports using current artifacts
         """
-        output = grcov.report(
-            self.artifactsHandler.get(), source_dir=self.repo_dir, out_format="covdir"
-        )
-        logger.info("Covdir report generated successfully")
-        return json.loads(output)
+        os.makedirs(self.reports_dir, exist_ok=True)
 
-    # This function is executed when the bot is triggered at the end of a mozilla-central build.
-    def go_from_trigger_mozilla_central(self):
-        # Check the covdir report does not already exists
-        if uploader.gcp_covdir_exists(self.branch, self.revision):
-            logger.warn("Covdir report already on GCP")
-            return
+        reports = {}
+        for (
+            (platform, suite),
+            artifacts,
+        ) in self.artifactsHandler.get_combinations().items():
 
-        self.retrieve_source_and_artifacts()
+            if only is not None and (platform, suite) not in only:
+                continue
+
+            # Generate covdir report for that suite & platform
+            logger.info(
+                "Building covdir suite report",
+                suite=suite,
+                platform=platform,
+                artifacts=len(artifacts),
+            )
+            output = grcov.report(
+                artifacts, source_dir=self.repo_dir, out_format="covdir"
+            )
+
+            # Write output on FS
+            path = os.path.join(self.reports_dir, f"{platform}.{suite}.json")
+            with open(path, "wb") as f:
+                f.write(output)
 
-        # Check that all JavaScript files present in the coverage artifacts actually exist.
-        # If they don't, there might be a bug in the LCOV rewriter.
+            reports[(platform, suite)] = path
+
+        return reports
+
+    def upload_reports(self, reports):
+        """
+        Upload all provided covdir reports on GCP
+        """
+        for (platform, suite), path in reports.items():
+            report = open(path, "rb").read()
+            uploader.gcp(
+                self.branch, self.revision, report, suite=suite, platform=platform
+            )
+
+    def check_javascript_files(self):
+        """
+        Check that all JavaScript files present in the coverage artifacts actually exist.
+        If they don't, there might be a bug in the LCOV rewriter.
+        """
         for artifact in self.artifactsHandler.get():
             if "jsvm" not in artifact:
                 continue
@@ -161,7 +191,24 @@ def go_from_trigger_mozilla_central(self):
                                 f"{missing_files} are present in coverage reports, but missing from the repository"
                             )
 
-        report = self.generate_covdir()
+    # This function is executed when the bot is triggered at the end of a mozilla-central build.
+    def go_from_trigger_mozilla_central(self):
+        # Check the covdir report does not already exists
+        if uploader.gcp_covdir_exists(self.branch, self.revision, "all", "all"):
+            logger.warn("Full covdir report already on GCP")
+            return
+
+        self.retrieve_source_and_artifacts()
+
+        self.check_javascript_files()
+
+        reports = self.build_reports()
+        logger.info("Built all covdir reports", nb=len(reports))
+
+        # Retrieve the full report
+        full_path = reports.get(("all", "all"))
+        assert full_path is not None, "Missing full report (all:all)"
+        report = json.load(open(full_path))
 
         paths = uploader.covdir_paths(report)
         expected_extensions = [".js", ".cpp"]
@@ -170,6 +217,9 @@ def go_from_trigger_mozilla_central(self):
                 path.endswith(extension) for path in paths
             ), "No {} file in the generated report".format(extension)
 
+        self.upload_reports(reports)
+        logger.info("Uploaded all covdir reports", nb=len(reports))
+
         # Get pushlog and ask the backend to generate the coverage by changeset
         # data, which will be cached.
         with hgmo.HGMO(self.repo_dir) as hgmo_server:
@@ -179,9 +229,6 @@ def go_from_trigger_mozilla_central(self):
         phabricatorUploader = PhabricatorUploader(self.repo_dir, self.revision)
         changesets_coverage = phabricatorUploader.upload(report, changesets)
 
-        uploader.gcp(self.branch, self.revision, report)
-
-        logger.info("Build uploaded on GCP")
         notify_email(self.revision, changesets, changesets_coverage)
 
     # This function is executed when the bot is triggered at the end of a try build.
@@ -201,7 +248,10 @@ def go_from_trigger_try(self):
 
         self.retrieve_source_and_artifacts()
 
-        report = self.generate_covdir()
+        reports = self.build_reports(only=[("all", "all")])
+        full_path = reports.get(("all", "all"))
+        assert full_path is not None, "Missing full report (all:all)"
+        report = json.load(open(full_path))
 
         logger.info("Upload changeset coverage data to Phabricator")
         phabricatorUploader.upload(report, changesets)

diff --git a/bot/code_coverage_bot/uploader.py b/bot/code_coverage_bot/uploader.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import itertools
-import json
 import os.path
 
 import requests
@@ -12,25 +11,29 @@
 from code_coverage_tools.gcp import get_bucket
 
 logger = structlog.get_logger(__name__)
-GCP_COVDIR_PATH = "{repository}/{revision}.json.zstd"
+GCP_COVDIR_PATH = "{repository}/{revision}/{platform}:{suite}.json.zstd"
 
 
-def gcp(repository, revision, report):
+def gcp(repository, revision, report, platform, suite):
     """
     Upload a grcov raw report on Google Cloud Storage
     * Compress with zstandard
     * Upload on bucket using revision in name
     * Trigger ingestion on channel's backend
     """
-    assert isinstance(report, dict)
+    assert isinstance(report, bytes)
+    assert isinstance(platform, str)
+    assert isinstance(suite, str)
     bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE])
 
     # Compress report
     compressor = zstd.ZstdCompressor()
-    archive = compressor.compress(json.dumps(report).encode("utf-8"))
+    archive = compressor.compress(report)
 
     # Upload archive
-    path = GCP_COVDIR_PATH.format(repository=repository, revision=revision)
+    path = GCP_COVDIR_PATH.format(
+        repository=repository, revision=revision, platform=platform, suite=suite
+    )
     blob = bucket.blob(path)
     blob.upload_from_string(archive)
 
@@ -42,35 +45,47 @@ def gcp(repository, revision, report):
     logger.info("Uploaded {} on {}".format(path, bucket))
 
     # Trigger ingestion on backend
-    retry(lambda: gcp_ingest(repository, revision), retries=10, wait_between_retries=60)
+    retry(
+        lambda: gcp_ingest(repository, revision, platform, suite),
+        retries=10,
+        wait_between_retries=60,
+    )
 
     return blob
 
 
-def gcp_covdir_exists(repository, revision):
+def gcp_covdir_exists(repository, revision, platform, suite):
     """
     Check if a covdir report exists on the Google Cloud Storage bucket
     """
     bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE])
-    path = GCP_COVDIR_PATH.format(repository=repository, revision=revision)
+    path = GCP_COVDIR_PATH.format(
+        repository=repository, revision=revision, platform=platform, suite=suite
+    )
     blob = bucket.blob(path)
     return blob.exists()
 
 
-def gcp_ingest(repository, revision):
+def gcp_ingest(repository, revision, platform, suite):
     """
     The GCP report ingestion is triggered remotely on a backend
     by making a simple HTTP request on the /v2/path endpoint
     By specifying the exact new revision processed, the backend
     will download automatically the new report.
     """
     params = {"repository": repository, "changeset": revision}
+    if platform:
+        params["platform"] = platform
+    if suite:
+        params["suite"] = suite
     backend_host = secrets[secrets.BACKEND_HOST]
     logger.info(
         "Ingesting report on backend",
         host=backend_host,
         repository=repository,
         revision=revision,
+        platform=platform,
+        suite=suite,
     )
     resp = requests.get("{}/v2/path".format(backend_host), params=params)
     resp.raise_for_status()