Skip to content

bot: build detailed reports for tests suites & platforms #144

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 52 additions & 10 deletions bot/code_coverage_bot/artifacts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
import collections
import fnmatch
import itertools
import os
import time

Expand All @@ -11,6 +13,9 @@
logger = structlog.get_logger(__name__)


Artifact = collections.namedtuple("Artifact", "path, task_id, platform, suite, chunk")


SUITES_TO_IGNORE = [
"awsy",
"talos",
Expand All @@ -25,41 +30,74 @@ def __init__(self, task_ids, parent_dir="ccov-artifacts", task_name_filter="*"):
self.task_ids = task_ids
self.parent_dir = parent_dir
self.task_name_filter = task_name_filter
self.artifacts = []

def generate_path(self, platform, chunk, artifact):
file_name = "%s_%s_%s" % (platform, chunk, os.path.basename(artifact["name"]))
return os.path.join(self.parent_dir, file_name)

def get_chunks(self, platform):
return set(
f.split("_")[1]
for f in os.listdir(self.parent_dir)
if os.path.basename(f).startswith(f"{platform}_")
artifact.chunk
for artifact in self.artifacts
if artifact.platform == platform
)

def get(self, platform=None, suite=None, chunk=None):
files = os.listdir(self.parent_dir)
def get_combinations(self):
# Add the full report
out = collections.defaultdict(list)
out[("all", "all")] = [artifact.path for artifact in self.artifacts]

# Group by suite first
suites = itertools.groupby(
sorted(self.artifacts, key=lambda a: a.suite), lambda a: a.suite
)
for suite, artifacts in suites:
artifacts = list(artifacts)

# List all available platforms
platforms = {a.platform for a in artifacts}
platforms.add("all")

# And list all possible permutations with suite + platform
out[("all", suite)] += [artifact.path for artifact in artifacts]
for platform in platforms:
if platform != "all":
out[(platform, "all")] += [
artifact.path
for artifact in artifacts
if artifact.platform == platform
]
out[(platform, suite)] = [
artifact.path
for artifact in artifacts
if platform == "all" or artifact.platform == platform
]

return out

def get(self, platform=None, suite=None, chunk=None):
if suite is not None and chunk is not None:
raise Exception("suite and chunk can't both have a value")

# Filter artifacts according to platform, suite and chunk.
filtered_files = []
for fname in files:
if platform is not None and not fname.startswith("%s_" % platform):
for artifact in self.artifacts:
if platform is not None and artifact.platform != platform:
continue

if suite is not None and suite not in fname:
if suite is not None and artifact.suite != suite:
continue

if chunk is not None and ("%s_code-coverage" % chunk) not in fname:
if chunk is not None and artifact.chunk != chunk:
continue

filtered_files.append(os.path.join(self.parent_dir, fname))
filtered_files.append(artifact.path)

return filtered_files

def download(self, test_task):
suite = taskcluster.get_suite(test_task["task"])
chunk_name = taskcluster.get_chunk(test_task["task"])
platform_name = taskcluster.get_platform(test_task["task"])
test_task_id = test_task["status"]["taskId"]
Expand All @@ -75,6 +113,10 @@ def download(self, test_task):
taskcluster.download_artifact(artifact_path, test_task_id, artifact["name"])
logger.info("%s artifact downloaded" % artifact_path)

self.artifacts.append(
Artifact(artifact_path, test_task_id, platform_name, suite, chunk_name)
)

def is_filtered_task(self, task):
"""
Apply name filter from CLI args on task name
Expand Down
92 changes: 71 additions & 21 deletions bot/code_coverage_bot/codecov.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(self, repository, revision, task_name_filter, cache_root):

temp_dir = tempfile.mkdtemp()
self.artifacts_dir = os.path.join(temp_dir, "ccov-artifacts")
self.reports_dir = os.path.join(temp_dir, "ccov-reports")

self.index_service = taskcluster_config.get_service("index")

Expand Down Expand Up @@ -118,27 +119,56 @@ def retrieve_source_and_artifacts(self):
# Thread 2 - Clone repository.
executor.submit(self.clone_repository, self.repository, self.revision)

def generate_covdir(self):
def build_reports(self, only=None):
"""
Build the covdir report using current artifacts
Build all the possible covdir reports using current artifacts
"""
output = grcov.report(
self.artifactsHandler.get(), source_dir=self.repo_dir, out_format="covdir"
)
logger.info("Covdir report generated successfully")
return json.loads(output)
os.makedirs(self.reports_dir, exist_ok=True)

# This function is executed when the bot is triggered at the end of a mozilla-central build.
def go_from_trigger_mozilla_central(self):
# Check the covdir report does not already exists
if uploader.gcp_covdir_exists(self.branch, self.revision):
logger.warn("Covdir report already on GCP")
return
reports = {}
for (
(platform, suite),
artifacts,
) in self.artifactsHandler.get_combinations().items():

self.retrieve_source_and_artifacts()
if only is not None and (platform, suite) not in only:
continue

# Generate covdir report for that suite & platform
logger.info(
"Building covdir suite report",
suite=suite,
platform=platform,
artifacts=len(artifacts),
)
output = grcov.report(
artifacts, source_dir=self.repo_dir, out_format="covdir"
)

# Write output on FS
path = os.path.join(self.reports_dir, f"{platform}.{suite}.json")
with open(path, "wb") as f:
f.write(output)

# Check that all JavaScript files present in the coverage artifacts actually exist.
# If they don't, there might be a bug in the LCOV rewriter.
reports[(platform, suite)] = path

return reports

def upload_reports(self, reports):
"""
Upload all provided covdir reports on GCP
"""
for (platform, suite), path in reports.items():
report = open(path, "rb").read()
uploader.gcp(
self.branch, self.revision, report, suite=suite, platform=platform
)

def check_javascript_files(self):
"""
Check that all JavaScript files present in the coverage artifacts actually exist.
If they don't, there might be a bug in the LCOV rewriter.
"""
for artifact in self.artifactsHandler.get():
if "jsvm" not in artifact:
continue
Expand All @@ -161,7 +191,24 @@ def go_from_trigger_mozilla_central(self):
f"{missing_files} are present in coverage reports, but missing from the repository"
)

report = self.generate_covdir()
# This function is executed when the bot is triggered at the end of a mozilla-central build.
def go_from_trigger_mozilla_central(self):
# Check the covdir report does not already exists
if uploader.gcp_covdir_exists(self.branch, self.revision, "all", "all"):
logger.warn("Full covdir report already on GCP")
return

self.retrieve_source_and_artifacts()

self.check_javascript_files()

reports = self.build_reports()
logger.info("Built all covdir reports", nb=len(reports))

# Retrieve the full report
full_path = reports.get(("all", "all"))
assert full_path is not None, "Missing full report (all:all)"
report = json.load(open(full_path))

paths = uploader.covdir_paths(report)
expected_extensions = [".js", ".cpp"]
Expand All @@ -170,6 +217,9 @@ def go_from_trigger_mozilla_central(self):
path.endswith(extension) for path in paths
), "No {} file in the generated report".format(extension)

self.upload_reports(reports)
logger.info("Uploaded all covdir reports", nb=len(reports))

# Get pushlog and ask the backend to generate the coverage by changeset
# data, which will be cached.
with hgmo.HGMO(self.repo_dir) as hgmo_server:
Expand All @@ -179,9 +229,6 @@ def go_from_trigger_mozilla_central(self):
phabricatorUploader = PhabricatorUploader(self.repo_dir, self.revision)
changesets_coverage = phabricatorUploader.upload(report, changesets)

uploader.gcp(self.branch, self.revision, report)

logger.info("Build uploaded on GCP")
notify_email(self.revision, changesets, changesets_coverage)

# This function is executed when the bot is triggered at the end of a try build.
Expand All @@ -201,7 +248,10 @@ def go_from_trigger_try(self):

self.retrieve_source_and_artifacts()

report = self.generate_covdir()
reports = self.build_reports(only=[("all", "all")])
full_path = reports.get(("all", "all"))
assert full_path is not None, "Missing full report (all:all)"
report = json.load(open(full_path))

logger.info("Upload changeset coverage data to Phabricator")
phabricatorUploader.upload(report, changesets)
Expand Down
35 changes: 25 additions & 10 deletions bot/code_coverage_bot/uploader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import itertools
import json
import os.path

import requests
Expand All @@ -12,25 +11,29 @@
from code_coverage_tools.gcp import get_bucket

logger = structlog.get_logger(__name__)
GCP_COVDIR_PATH = "{repository}/{revision}.json.zstd"
GCP_COVDIR_PATH = "{repository}/{revision}/{platform}:{suite}.json.zstd"


def gcp(repository, revision, report):
def gcp(repository, revision, report, platform, suite):
"""
Upload a grcov raw report on Google Cloud Storage
* Compress with zstandard
* Upload on bucket using revision in name
* Trigger ingestion on channel's backend
"""
assert isinstance(report, dict)
assert isinstance(report, bytes)
assert isinstance(platform, str)
assert isinstance(suite, str)
bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE])

# Compress report
compressor = zstd.ZstdCompressor()
archive = compressor.compress(json.dumps(report).encode("utf-8"))
archive = compressor.compress(report)

# Upload archive
path = GCP_COVDIR_PATH.format(repository=repository, revision=revision)
path = GCP_COVDIR_PATH.format(
repository=repository, revision=revision, platform=platform, suite=suite
)
blob = bucket.blob(path)
blob.upload_from_string(archive)

Expand All @@ -42,35 +45,47 @@ def gcp(repository, revision, report):
logger.info("Uploaded {} on {}".format(path, bucket))

# Trigger ingestion on backend
retry(lambda: gcp_ingest(repository, revision), retries=10, wait_between_retries=60)
retry(
lambda: gcp_ingest(repository, revision, platform, suite),
retries=10,
wait_between_retries=60,
)

return blob


def gcp_covdir_exists(repository, revision):
def gcp_covdir_exists(repository, revision, platform, suite):
"""
Check if a covdir report exists on the Google Cloud Storage bucket
"""
bucket = get_bucket(secrets[secrets.GOOGLE_CLOUD_STORAGE])
path = GCP_COVDIR_PATH.format(repository=repository, revision=revision)
path = GCP_COVDIR_PATH.format(
repository=repository, revision=revision, platform=platform, suite=suite
)
blob = bucket.blob(path)
return blob.exists()


def gcp_ingest(repository, revision):
def gcp_ingest(repository, revision, platform, suite):
"""
The GCP report ingestion is triggered remotely on a backend
by making a simple HTTP request on the /v2/path endpoint
By specifying the exact new revision processed, the backend
will download automatically the new report.
"""
params = {"repository": repository, "changeset": revision}
if platform:
params["platform"] = platform
if suite:
params["suite"] = suite
backend_host = secrets[secrets.BACKEND_HOST]
logger.info(
"Ingesting report on backend",
host=backend_host,
repository=repository,
revision=revision,
platform=platform,
suite=suite,
)
resp = requests.get("{}/v2/path".format(backend_host), params=params)
resp.raise_for_status()
Expand Down
Loading