diff --git a/bot/code_coverage_bot/artifacts.py b/bot/code_coverage_bot/artifacts.py index 51019cbef..57fe7dfcd 100644 --- a/bot/code_coverage_bot/artifacts.py +++ b/bot/code_coverage_bot/artifacts.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import collections import fnmatch +import itertools import os import time @@ -11,6 +13,9 @@ logger = structlog.get_logger(__name__) +Artifact = collections.namedtuple("Artifact", "path, task_id, platform, suite, chunk") + + SUITES_TO_IGNORE = [ "awsy", "talos", @@ -25,6 +30,7 @@ def __init__(self, task_ids, parent_dir="ccov-artifacts", task_name_filter="*"): self.task_ids = task_ids self.parent_dir = parent_dir self.task_name_filter = task_name_filter + self.artifacts = [] def generate_path(self, platform, chunk, artifact): file_name = "%s_%s_%s" % (platform, chunk, os.path.basename(artifact["name"])) @@ -32,34 +38,66 @@ def generate_path(self, platform, chunk, artifact): def get_chunks(self, platform): return set( - f.split("_")[1] - for f in os.listdir(self.parent_dir) - if os.path.basename(f).startswith(f"{platform}_") + artifact.chunk + for artifact in self.artifacts + if artifact.platform == platform ) - def get(self, platform=None, suite=None, chunk=None): - files = os.listdir(self.parent_dir) + def get_combinations(self): + # Add the full report + out = collections.defaultdict(list) + out[("all", "all")] = [artifact.path for artifact in self.artifacts] + + # Group by suite first + suites = itertools.groupby( + sorted(self.artifacts, key=lambda a: a.suite), lambda a: a.suite + ) + for suite, artifacts in suites: + artifacts = list(artifacts) + + # List all available platforms + platforms = {a.platform for a in artifacts} + platforms.add("all") + + # And list all possible permutations with suite + platform + out[("all", suite)] += [artifact.path for artifact in artifacts] + for platform in platforms: + if platform != "all": + out[(platform, "all")] += [ + artifact.path + for artifact in artifacts + if artifact.platform == platform + ] + out[(platform, suite)] = [ + artifact.path + for artifact in artifacts + if platform == "all" or artifact.platform == platform + ] + + return out + def get(self, platform=None, suite=None, chunk=None): if suite is not None and chunk is not None: raise Exception("suite and chunk can't both have a value") # Filter artifacts according to platform, suite and chunk. filtered_files = [] - for fname in files: - if platform is not None and not fname.startswith("%s_" % platform): + for artifact in self.artifacts: + if platform is not None and artifact.platform != platform: continue - if suite is not None and suite not in fname: + if suite is not None and artifact.suite != suite: continue - if chunk is not None and ("%s_code-coverage" % chunk) not in fname: + if chunk is not None and artifact.chunk != chunk: continue - filtered_files.append(os.path.join(self.parent_dir, fname)) + filtered_files.append(artifact.path) return filtered_files def download(self, test_task): + suite = taskcluster.get_suite(test_task["task"]) chunk_name = taskcluster.get_chunk(test_task["task"]) platform_name = taskcluster.get_platform(test_task["task"]) test_task_id = test_task["status"]["taskId"] @@ -75,6 +113,10 @@ def download(self, test_task): taskcluster.download_artifact(artifact_path, test_task_id, artifact["name"]) logger.info("%s artifact downloaded" % artifact_path) + self.artifacts.append( + Artifact(artifact_path, test_task_id, platform_name, suite, chunk_name) + ) + def is_filtered_task(self, task): """ Apply name filter from CLI args on task name diff --git a/bot/code_coverage_bot/codecov.py b/bot/code_coverage_bot/codecov.py index 28297d5ca..2c1c3c617 100644 --- a/bot/code_coverage_bot/codecov.py +++ b/bot/code_coverage_bot/codecov.py @@ -42,6 +42,7 @@ def __init__(self, repository, revision, task_name_filter, cache_root): temp_dir = tempfile.mkdtemp() self.artifacts_dir = os.path.join(temp_dir, "ccov-artifacts") + self.reports_dir = os.path.join(temp_dir, "ccov-reports") self.index_service = taskcluster_config.get_service("index") @@ -118,27 +119,56 @@ def retrieve_source_and_artifacts(self): # Thread 2 - Clone repository. executor.submit(self.clone_repository, self.repository, self.revision) - def generate_covdir(self): + def build_reports(self, only=None): """ - Build the covdir report using current artifacts + Build all the possible covdir reports using current artifacts """ - output = grcov.report( - self.artifactsHandler.get(), source_dir=self.repo_dir, out_format="covdir" - ) - logger.info("Covdir report generated successfully") - return json.loads(output) + os.makedirs(self.reports_dir, exist_ok=True) - # This function is executed when the bot is triggered at the end of a mozilla-central build. - def go_from_trigger_mozilla_central(self): - # Check the covdir report does not already exists - if uploader.gcp_covdir_exists(self.branch, self.revision): - logger.warn("Covdir report already on GCP") - return + reports = {} + for ( + (platform, suite), + artifacts, + ) in self.artifactsHandler.get_combinations().items(): - self.retrieve_source_and_artifacts() + if only is not None and (platform, suite) not in only: + continue + + # Generate covdir report for that suite & platform + logger.info( + "Building covdir suite report", + suite=suite, + platform=platform, + artifacts=len(artifacts), + ) + output = grcov.report( + artifacts, source_dir=self.repo_dir, out_format="covdir" + ) + + # Write output on FS + path = os.path.join(self.reports_dir, f"{platform}.{suite}.json") + with open(path, "wb") as f: + f.write(output) - # Check that all JavaScript files present in the coverage artifacts actually exist. - # If they don't, there might be a bug in the LCOV rewriter. + reports[(platform, suite)] = path + + return reports + + def upload_reports(self, reports): + """ + Upload all provided covdir reports on GCP + """ + for (platform, suite), path in reports.items(): + report = open(path, "rb").read() + uploader.gcp( + self.branch, self.revision, report, suite=suite, platform=platform + ) + + def check_javascript_files(self): + """ + Check that all JavaScript files present in the coverage artifacts actually exist. + If they don't, there might be a bug in the LCOV rewriter. + """ for artifact in self.artifactsHandler.get(): if "jsvm" not in artifact: continue @@ -161,7 +191,24 @@ def go_from_trigger_mozilla_central(self): f"{missing_files} are present in coverage reports, but missing from the repository" ) - report = self.generate_covdir() + # This function is executed when the bot is triggered at the end of a mozilla-central build. + def go_from_trigger_mozilla_central(self): + # Check the covdir report does not already exists + if uploader.gcp_covdir_exists(self.branch, self.revision, "all", "all"): + logger.warn("Full covdir report already on GCP") + return + + self.retrieve_source_and_artifacts() + + self.check_javascript_files() + + reports = self.build_reports() + logger.info("Built all covdir reports", nb=len(reports)) + + # Retrieve the full report + full_path = reports.get(("all", "all")) + assert full_path is not None, "Missing full report (all:all)" + report = json.load(open(full_path)) paths = uploader.covdir_paths(report) expected_extensions = [".js", ".cpp"] @@ -170,6 +217,9 @@ def go_from_trigger_mozilla_central(self): path.endswith(extension) for path in paths ), "No {} file in the generated report".format(extension) + self.upload_reports(reports) + logger.info("Uploaded all covdir reports", nb=len(reports)) + # Get pushlog and ask the backend to generate the coverage by changeset # data, which will be cached. with hgmo.HGMO(self.repo_dir) as hgmo_server: @@ -179,9 +229,6 @@ def go_from_trigger_mozilla_central(self): phabricatorUploader = PhabricatorUploader(self.repo_dir, self.revision) changesets_coverage = phabricatorUploader.upload(report, changesets) - uploader.gcp(self.branch, self.revision, report) - - logger.info("Build uploaded on GCP") notify_email(self.revision, changesets, changesets_coverage) # This function is executed when the bot is triggered at the end of a try build. @@ -201,7 +248,10 @@ def go_from_trigger_try(self): self.retrieve_source_and_artifacts() - report = self.generate_covdir() + reports = self.build_reports(only=[("all", "all")]) + full_path = reports.get(("all", "all")) + assert full_path is not None, "Missing full report (all:all)" + report = json.load(open(full_path)) logger.info("Upload changeset coverage data to Phabricator") phabricatorUploader.upload(report, changesets) diff --git a/bot/code_coverage_bot/uploader.py b/bot/code_coverage_bot/uploader.py index fe573453a..4b8c4a26b 100644 --- a/bot/code_coverage_bot/uploader.py +++ b/bot/code_coverage_bot/uploader.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import itertools -import json import os.path import requests @@ -12,25 +11,29 @@ from code_coverage_tools.gcp import get_bucket logger = structlog.get_logger(__name__) -GCP_COVDIR_PATH = "{repository}/{revision}.json.zstd" +GCP_COVDIR_PATH = "{repository}/{revision}/{platform}:{suite}.json.zstd" -def gcp(repository, revision, report): +def gcp(repository, revision, report, platform, suite): """ Upload a grcov raw report on Google Cloud Storage * Compress with zstandard * Upload on bucket using revision in name * Trigger ingestion on channel's backend """ - assert isinstance(report, dict) + assert isinstance(report, bytes) + assert isinstance(platform, str) + assert isinstance(suite, str) bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) # Compress report compressor = zstd.ZstdCompressor() - archive = compressor.compress(json.dumps(report).encode("utf-8")) + archive = compressor.compress(report) # Upload archive - path = GCP_COVDIR_PATH.format(repository=repository, revision=revision) + path = GCP_COVDIR_PATH.format( + repository=repository, revision=revision, platform=platform, suite=suite + ) blob = bucket.blob(path) blob.upload_from_string(archive) @@ -42,22 +45,28 @@ def gcp(repository, revision, report): logger.info("Uploaded {} on {}".format(path, bucket)) # Trigger ingestion on backend - retry(lambda: gcp_ingest(repository, revision), retries=10, wait_between_retries=60) + retry( + lambda: gcp_ingest(repository, revision, platform, suite), + retries=10, + wait_between_retries=60, + ) return blob -def gcp_covdir_exists(repository, revision): +def gcp_covdir_exists(repository, revision, platform, suite): """ Check if a covdir report exists on the Google Cloud Storage bucket """ bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE]) - path = GCP_COVDIR_PATH.format(repository=repository, revision=revision) + path = GCP_COVDIR_PATH.format( + repository=repository, revision=revision, platform=platform, suite=suite + ) blob = bucket.blob(path) return blob.exists() -def gcp_ingest(repository, revision): +def gcp_ingest(repository, revision, platform, suite): """ The GCP report ingestion is triggered remotely on a backend by making a simple HTTP request on the /v2/path endpoint @@ -65,12 +74,18 @@ def gcp_ingest(repository, revision): will download automatically the new report. """ params = {"repository": repository, "changeset": revision} + if platform: + params["platform"] = platform + if suite: + params["suite"] = suite backend_host = secrets[secrets.BACKEND_HOST] logger.info( "Ingesting report on backend", host=backend_host, repository=repository, revision=revision, + platform=platform, + suite=suite, ) resp = requests.get("{}/v2/path".format(backend_host), params=params) resp.raise_for_status() diff --git a/bot/tests/test_artifacts.py b/bot/tests/test_artifacts.py index d03aa4a06..40f759fb2 100644 --- a/bot/tests/test_artifacts.py +++ b/bot/tests/test_artifacts.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +import hashlib import itertools import os from unittest import mock @@ -7,6 +7,7 @@ import pytest import responses +from code_coverage_bot.artifacts import Artifact from code_coverage_bot.artifacts import ArtifactsHandler FILES = [ @@ -21,14 +22,28 @@ @pytest.fixture -def FAKE_ARTIFACTS_DIR(tmpdir): - for f in FILES: - open(os.path.join(tmpdir.strpath, f), "w") - return tmpdir.strpath +def fake_artifacts(tmpdir): + def name_to_artifact(name): + """ + Touch the fake artifact & build instance + """ + path = os.path.join(tmpdir.strpath, name) + open(path, "w") + + platform, chunk, _ = name.split("_") + return Artifact( + path, + hashlib.md5(name.encode("utf-8")).hexdigest()[:10], + platform, + chunk[: chunk.rindex("-")] if "-" in chunk else chunk, + chunk, + ) + + return [name_to_artifact(f) for f in FILES] -def test_generate_path(FAKE_ARTIFACTS_DIR): - a = ArtifactsHandler([], parent_dir=FAKE_ARTIFACTS_DIR) +def test_generate_path(fake_artifacts): + a = ArtifactsHandler([]) artifact_jsvm = {"name": "code-coverage-jsvm.info"} artifact_grcov = {"name": "code-coverage-grcov.zip"} assert os.path.join( @@ -39,8 +54,9 @@ def test_generate_path(FAKE_ARTIFACTS_DIR): ) == a.generate_path("windows", "cppunit", artifact_grcov) -def test_get_chunks(FAKE_ARTIFACTS_DIR): - a = ArtifactsHandler([], parent_dir=FAKE_ARTIFACTS_DIR) +def test_get_chunks(fake_artifacts): + a = ArtifactsHandler([]) + a.artifacts = fake_artifacts assert a.get_chunks("windows") == {"mochitest-1", "xpcshell-7", "cppunit"} assert a.get_chunks("linux") == { "mochitest-2", @@ -50,11 +66,82 @@ def test_get_chunks(FAKE_ARTIFACTS_DIR): } -def test_get_coverage_artifacts(FAKE_ARTIFACTS_DIR): +def test_get_combinations(tmpdir, fake_artifacts): def add_dir(files): - return set([os.path.join(FAKE_ARTIFACTS_DIR, f) for f in files]) + return [os.path.join(tmpdir.strpath, f) for f in files] - a = ArtifactsHandler([], parent_dir=FAKE_ARTIFACTS_DIR) + a = ArtifactsHandler([]) + a.artifacts = fake_artifacts + assert dict(a.get_combinations()) == { + ("all", "all"): add_dir( + [ + "windows_mochitest-1_code-coverage-jsvm.info", + "linux_mochitest-2_code-coverage-grcov.zip", + "windows_xpcshell-7_code-coverage-jsvm.info", + "linux_xpcshell-7_code-coverage-grcov.zip", + "linux_xpcshell-3_code-coverage-grcov.zip", + "windows_cppunit_code-coverage-grcov.zip", + "linux_firefox-ui-functional-remote_code-coverage-jsvm.info", + ] + ), + ("linux", "all"): add_dir( + [ + "linux_firefox-ui-functional-remote_code-coverage-jsvm.info", + "linux_mochitest-2_code-coverage-grcov.zip", + "linux_xpcshell-7_code-coverage-grcov.zip", + "linux_xpcshell-3_code-coverage-grcov.zip", + ] + ), + ("windows", "all"): add_dir( + [ + "windows_cppunit_code-coverage-grcov.zip", + "windows_mochitest-1_code-coverage-jsvm.info", + "windows_xpcshell-7_code-coverage-jsvm.info", + ] + ), + ("all", "cppunit"): add_dir(["windows_cppunit_code-coverage-grcov.zip"]), + ("windows", "cppunit"): add_dir(["windows_cppunit_code-coverage-grcov.zip"]), + ("all", "firefox-ui-functional"): add_dir( + ["linux_firefox-ui-functional-remote_code-coverage-jsvm.info"] + ), + ("linux", "firefox-ui-functional"): add_dir( + ["linux_firefox-ui-functional-remote_code-coverage-jsvm.info"] + ), + ("all", "mochitest"): add_dir( + [ + "windows_mochitest-1_code-coverage-jsvm.info", + "linux_mochitest-2_code-coverage-grcov.zip", + ] + ), + ("linux", "mochitest"): add_dir(["linux_mochitest-2_code-coverage-grcov.zip"]), + ("windows", "mochitest"): add_dir( + ["windows_mochitest-1_code-coverage-jsvm.info"] + ), + ("all", "xpcshell"): add_dir( + [ + "windows_xpcshell-7_code-coverage-jsvm.info", + "linux_xpcshell-7_code-coverage-grcov.zip", + "linux_xpcshell-3_code-coverage-grcov.zip", + ] + ), + ("linux", "xpcshell"): add_dir( + [ + "linux_xpcshell-7_code-coverage-grcov.zip", + "linux_xpcshell-3_code-coverage-grcov.zip", + ] + ), + ("windows", "xpcshell"): add_dir( + ["windows_xpcshell-7_code-coverage-jsvm.info"] + ), + } + + +def test_get_coverage_artifacts(tmpdir, fake_artifacts): + def add_dir(files): + return set([os.path.join(tmpdir.strpath, f) for f in files]) + + a = ArtifactsHandler([]) + a.artifacts = fake_artifacts assert set(a.get()) == add_dir(FILES) assert set(a.get(suite="mochitest")) == add_dir( [ @@ -174,7 +261,7 @@ def build_task(task_state): @responses.activate def test_download_all( - LINUX_TASK_ID, LINUX_TASK, GROUP_TASKS_1, GROUP_TASKS_2, FAKE_ARTIFACTS_DIR + LINUX_TASK_ID, LINUX_TASK, GROUP_TASKS_1, GROUP_TASKS_2, fake_artifacts ): responses.add( responses.GET, @@ -190,7 +277,7 @@ def test_download_all( status=200, ) - a = ArtifactsHandler({"linux": LINUX_TASK_ID}, parent_dir=FAKE_ARTIFACTS_DIR) + a = ArtifactsHandler({"linux": LINUX_TASK_ID}) downloaded = set() diff --git a/bot/tools/covdir_gen.py b/bot/tools/covdir_gen.py index e2be44679..7a5659756 100644 --- a/bot/tools/covdir_gen.py +++ b/bot/tools/covdir_gen.py @@ -1,85 +1,31 @@ # -*- coding: utf-8 -*- import argparse +import json import os from datetime import datetime -import requests -from libmozdata.vcs_map import download_mapfile -from libmozdata.vcs_map import git_to_mercurial from taskcluster.utils import slugId from code_coverage_bot.secrets import secrets -from code_coverage_tools.taskcluter import TaskclusterConfig +from code_coverage_bot.taskcluster import taskcluster_config -CODECOV_URL = "https://codecov.io/api/gh/marco-c/gecko-dev/commit" MC_REPO = "https://hg.mozilla.org/mozilla-central" -HOOK_GROUP = "project-releng" -HOOK_ID = "services-{app_channel}-codecoverage/bot-generation" - -taskcluster = TaskclusterConfig() -taskcluster.auth( - os.environ["TASKCLUSTER_CLIENT_ID"], os.environ["TASKCLUSTER_ACCESS_TOKEN"] -) -secrets.load(os.environ["TASKCLUSTER_SECRET"]) - - -def list_commits(codecov_token, maximum=None, unique=None, skip_commits=[]): - """ - List all the commits ingested on codecov - """ - assert unique in (None, "week", "day") - params = {"access_token": codecov_token, "page": 1} - nb = 0 - dates = set() - while True: - resp = requests.get(CODECOV_URL, params=params) - resp.raise_for_status() - data = resp.json() - - if not data["commits"]: - return - - for commit in data["commits"]: - - # Skip commit if that day or week has already been processed earlier - day = datetime.strptime(commit["timestamp"], "%Y-%m-%d %H:%M:%S").date() - week = day.isocalendar()[:2] - if unique == "day" and day in dates: - continue - if unique == "week" and week in dates: - continue - dates.add(day) - dates.add(week) - - # Convert git to mercurial revision - commit["mercurial"] = git_to_mercurial(commit["commitid"]) - if commit["mercurial"] in skip_commits: - print( - "Skipping already processed commit {}".format(commit["mercurial"]) - ) - continue - - yield commit - nb += 1 - - if maximum is not None and nb >= maximum: - return - - params["page"] += 1 +HOOK_GROUP = "project-relman" +HOOK_ID = "code-coverage-{app_channel}" def trigger_task(task_group_id, commit): """ Trigger a code coverage task to build covdir at a specified revision """ - assert "mercurial" in commit - name = "covdir {} - {} - {}".format( - secrets[secrets.APP_CHANNEL], commit["timestamp"], commit["mercurial"] + date = datetime.fromtimestamp(commit["date"]).strftime("%Y-%m-%d") + name = "covdir with suites on {} - {} - {}".format( + secrets[secrets.APP_CHANNEL], date, commit["changeset"] ) - hooks = taskcluster.get_service("hooks") + hooks = taskcluster_config.get_service("hooks") payload = { "REPOSITORY": MC_REPO, - "REVISION": commit["mercurial"], + "REVISION": commit["changeset"], "taskGroupId": task_group_id, "taskName": name, } @@ -91,11 +37,6 @@ def main(): # CLI args parser = argparse.ArgumentParser() parser.add_argument("--nb-tasks", type=int, default=5, help="NB of tasks to create") - parser.add_argument( - "--unique", - choices=("day", "week"), - help="Trigger only one task per day or week", - ) parser.add_argument( "--group", type=str, default=slugId(), help="Task group to create/update" ) @@ -106,20 +47,17 @@ def main(): help="List actions without triggering any new task", ) parser.add_argument( - "--codecov-token", - type=str, - default=os.environ.get("CODECOV_TOKEN"), - help="Codecov access token", + "history", type=open, help="JSON payload of /v2/history endpoint" ) args = parser.parse_args() - # Download revision mapper database - print("Downloading revision database...") - download_mapfile() + # Setup Taskcluster + taskcluster_config.auth() + secrets.load(os.environ["TASKCLUSTER_SECRET"]) # List existing tags & commits print("Group", args.group) - queue = taskcluster.get_service("queue") + queue = taskcluster_config.get_service("queue") try: group = queue.listTaskGroup(args.group) commits = [ @@ -136,14 +74,24 @@ def main(): print("Invalid task group : {}".format(e)) commits = [] + # Read the history file + history = json.load(args.history) + # Trigger a task for each commit - for commit in list_commits(args.codecov_token, args.nb_tasks, args.unique, commits): - print("Triggering commit {mercurial} from {timestamp}".format(**commit)) + nb = 0 + for commit in history: + if nb >= args.nb_tasks: + break + if commit in commits: + print("Skipping {commit {changeset} from {date}".format(**commit)) + continue + print("Triggering commit {changeset} from {date}".format(**commit)) if args.dry_run: print(">>> No trigger on dry run") else: out = trigger_task(args.group, commit) print(">>>", out["status"]["taskId"]) + nb += 1 if __name__ == "__main__":