Skip to content

Generate a "pure" per-chunk mapping file #105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 131 additions & 82 deletions bot/code_coverage_bot/chunk_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,90 +173,139 @@ def is_chunk_only_suite(suite):
return True


def _inner_generate(
repo_dir, revision, artifactsHandler, per_test_cursor, per_chunk_cursor, executor
):
per_test_cursor.execute(
"CREATE TABLE file_to_chunk (path text, platform text, chunk text)"
)
per_test_cursor.execute(
"CREATE TABLE chunk_to_test (platform text, chunk text, path text)"
)
per_test_cursor.execute("CREATE TABLE file_to_test (source text, test text)")

per_chunk_cursor.execute(
"CREATE TABLE file_to_chunk (path text, platform text, chunk text)"
)
per_chunk_cursor.execute(
"CREATE TABLE chunk_to_test (platform text, chunk text, path text)"
)

logger.info("Populating file_to_test table.")
test_coverage_suites = get_test_coverage_suites()
logger.info("Found {} test suites.".format(len(test_coverage_suites)))
for suites in group_by_20k(test_coverage_suites):
test_coverage_tests = get_test_coverage_tests(suites)
for tests in group_by_20k(test_coverage_tests):
tests_files_data = get_test_coverage_files(tests)

source_names = tests_files_data["source.file.name"]
test_iter = enumerate(tests_files_data["test.name"])
source_test_iter = ((source_names[i], test) for i, test in test_iter)

per_test_cursor.executemany(
"INSERT INTO file_to_test VALUES (?,?)", source_test_iter
)

futures = {}
for platform in PLATFORMS:
logger.info("Reading chunk coverage artifacts for {}.".format(platform))
for chunk in artifactsHandler.get_chunks(platform):
assert chunk.strip() != "", "chunk can not be an empty string"

artifacts = artifactsHandler.get(platform=platform, chunk=chunk)

assert len(artifacts) > 0, "There should be at least one artifact"

future = executor.submit(grcov.files_list, artifacts, source_dir=repo_dir)
futures[future] = (platform, chunk)

logger.info("Populating chunk_to_test table for {}.".format(platform))
for suite in get_suites(revision):
tests_data = get_tests_chunks(revision, platform, suite)
if len(tests_data) == 0:
logger.warn(
"No tests found for platform {} and suite {}.".format(
platform, suite
)
)
continue

logger.info(
"Adding tests for platform {} and suite {}".format(platform, suite)
)
task_names = tests_data["run.key"]

def chunk_test_iter():
test_iter = enumerate(tests_data["result.test"])
return (
(platform, taskcluster.get_chunk(task_names[i]), test)
for i, test in test_iter
)

if is_chunk_only_suite(suite):
per_test_cursor.executemany(
"INSERT INTO chunk_to_test VALUES (?,?,?)", chunk_test_iter()
)

per_chunk_cursor.executemany(
"INSERT INTO chunk_to_test VALUES (?,?,?)", chunk_test_iter()
)

logger.info("Populating file_to_chunk table.")
for future in concurrent.futures.as_completed(futures):
(platform, chunk) = futures[future]
files = future.result()

suite = taskcluster.get_suite(chunk)
if is_chunk_only_suite(suite):
per_test_cursor.executemany(
"INSERT INTO file_to_chunk VALUES (?,?,?)",
((f, platform, chunk) for f in files),
)

per_chunk_cursor.executemany(
"INSERT INTO file_to_chunk VALUES (?,?,?)",
((f, platform, chunk) for f in files),
)


def generate(repo_dir, revision, artifactsHandler, out_dir="."):
logger.info("Generating chunk mapping...")
sqlite_file = os.path.join(out_dir, "chunk_mapping.sqlite")
tarxz_file = os.path.join(out_dir, "chunk_mapping.tar.xz")

with sqlite3.connect(sqlite_file) as conn:
logger.info("Creating tables.")
c = conn.cursor()
c.execute("CREATE TABLE file_to_chunk (path text, platform text, chunk text)")
c.execute("CREATE TABLE chunk_to_test (platform text, chunk text, path text)")
c.execute("CREATE TABLE file_to_test (source text, test text)")

logger.info("Populating file_to_test table.")
test_coverage_suites = get_test_coverage_suites()
logger.info("Found {} test suites.".format(len(test_coverage_suites)))
for suites in group_by_20k(test_coverage_suites):
test_coverage_tests = get_test_coverage_tests(suites)
for tests in group_by_20k(test_coverage_tests):
tests_files_data = get_test_coverage_files(tests)

source_names = tests_files_data["source.file.name"]
test_iter = enumerate(tests_files_data["test.name"])
source_test_iter = ((source_names[i], test) for i, test in test_iter)

c.executemany("INSERT INTO file_to_test VALUES (?,?)", source_test_iter)

with ThreadPoolExecutor(max_workers=4) as executor:
futures = {}
for platform in PLATFORMS:
logger.info("Reading chunk coverage artifacts for {}.".format(platform))
for chunk in artifactsHandler.get_chunks(platform):
suite = taskcluster.get_suite(chunk)
if not is_chunk_only_suite(suite):
continue

assert chunk.strip() != "", "chunk can not be an empty string"

artifacts = artifactsHandler.get(platform=platform, chunk=chunk)

assert len(artifacts) > 0, "There should be at least one artifact"

future = executor.submit(
grcov.files_list, artifacts, source_dir=repo_dir
)
futures[future] = (platform, chunk)

logger.info("Populating chunk_to_test table for {}.".format(platform))
for suite in get_suites(revision):
if not is_chunk_only_suite(suite):
continue

tests_data = get_tests_chunks(revision, platform, suite)
if len(tests_data) == 0:
logger.warn(
"No tests found for platform {} and suite {}.".format(
platform, suite
)
)
continue

logger.info(
"Adding tests for platform {} and suite {}".format(
platform, suite
)
)
task_names = tests_data["run.key"]
test_iter = enumerate(tests_data["result.test"])
chunk_test_iter = (
(platform, taskcluster.get_chunk(task_names[i]), test)
for i, test in test_iter
)
c.executemany(
"INSERT INTO chunk_to_test VALUES (?,?,?)", chunk_test_iter
)

logger.info("Populating file_to_chunk table.")
for future in concurrent.futures.as_completed(futures):
(platform, chunk) = futures[future]
files = future.result()
c.executemany(
"INSERT INTO file_to_chunk VALUES (?,?,?)",
((f, platform, chunk) for f in files),
# TODO: Change chunk_mapping to test_mapping, but the name should be synced in mozilla-central
# in the coverage selector!
per_test_sqlite_file = os.path.join(out_dir, "chunk_mapping.sqlite")
per_test_tarxz_file = os.path.join(out_dir, "chunk_mapping.tar.xz")

per_chunk_sqlite_file = os.path.join(out_dir, "per_chunk_mapping.sqlite")
per_chunk_tarxz_file = os.path.join(out_dir, "per_chunk_mapping.tar.xz")

logger.info("Creating tables.")
with sqlite3.connect(per_test_sqlite_file) as per_test_conn:
per_test_cursor = per_test_conn.cursor()

with sqlite3.connect(per_chunk_sqlite_file) as per_chunk_conn:
per_chunk_cursor = per_chunk_conn.cursor()

with ThreadPoolExecutor(max_workers=4) as executor:
_inner_generate(
repo_dir,
revision,
artifactsHandler,
per_test_cursor,
per_chunk_cursor,
executor,
)

logger.info("Writing the chunk mapping archive at {}.".format(tarxz_file))
with tarfile.open(tarxz_file, "w:xz") as tar:
tar.add(sqlite_file, os.path.basename(sqlite_file))
logger.info(
"Writing the per-test mapping archive at {}.".format(per_test_tarxz_file)
)
with tarfile.open(per_test_tarxz_file, "w:xz") as tar:
tar.add(per_test_sqlite_file, os.path.basename(per_test_sqlite_file))

logger.info(
"Writing the per-chunk mapping archive at {}.".format(per_chunk_tarxz_file)
)
with tarfile.open(per_chunk_tarxz_file, "w:xz") as tar:
tar.add(per_chunk_sqlite_file, os.path.basename(per_chunk_sqlite_file))
40 changes: 30 additions & 10 deletions bot/tests/test_chunk_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self):
pass

def get_chunks(self, platform):
return {"chunk1", "chunk2"}
return {"chunk1", "chunk2", "mochitest"}

def get(self, platform=None, suite=None, chunk=None):
if platform == "linux" and chunk == "chunk1":
Expand All @@ -33,6 +33,8 @@ def get(self, platform=None, suite=None, chunk=None):
return [grcov_existing_file_artifact] # code_coverage_bot/cli.py
elif platform == "windows" and chunk == "chunk2":
return [grcov_uncovered_function_artifact] # js/src/jit/JIT.cpp
elif platform in ["linux", "windows"] and chunk == "mochitest":
return [grcov_artifact] # js/src/jit/BitSet.cpp

return FakeArtifactsHandler()

Expand All @@ -44,12 +46,10 @@ def assert_file_to_test(c, source_path, test_path):
assert results[0][0] == test_path


def assert_file_to_chunk(c, path, platform, chunk):
def assert_file_to_chunk(c, path, expected_results):
c.execute("SELECT platform, chunk FROM file_to_chunk WHERE path=?", (path,))
results = c.fetchall()
assert len(results) == 1
assert results[0][0] == platform
assert results[0][1] == chunk
assert set(results) == set(expected_results)


def assert_chunk_to_test(c, platform, chunk, tests):
Expand Down Expand Up @@ -112,7 +112,7 @@ def request_callback(request):
requested_suite = payload["where"]["and"][2]["eq"][
"run.suite.fullname"
]
if requested_suite == "gtest":
if requested_suite in ["gtest", "talos"]:
data = {}
elif requested_suite == "marionette":
prefix = payload["where"]["and"][3]["prefix"]["run.key"]
Expand Down Expand Up @@ -172,12 +172,32 @@ def request_callback(request):
"netwerk/test/unit/test_substituting_protocol_handler.js",
)

assert_file_to_chunk(c, "js/src/jit/BitSet.cpp", "linux", "chunk1")
assert_file_to_chunk(c, "js/src/jit/BitSet.cpp", [("linux", "chunk1")])
assert_file_to_chunk(
c, "toolkit/components/osfile/osfile.jsm", [("linux", "chunk2")]
)
assert_file_to_chunk(c, "code_coverage_bot/cli.py", [("windows", "chunk1")])
assert_file_to_chunk(c, "js/src/jit/JIT.cpp", [("windows", "chunk2")])

assert_chunk_to_test(c, "linux", "marionette-headless", ["marionette-test1"])
assert_chunk_to_test(c, "windows", "marionette", ["marionette-test2"])

with tarfile.open(os.path.join(tmp_path, "per_chunk_mapping.tar.xz")) as t:
t.extract("per_chunk_mapping.sqlite", tmp_path)

with sqlite3.connect(os.path.join(tmp_path, "per_chunk_mapping.sqlite")) as conn:
c = conn.cursor()

assert_file_to_chunk(
c,
"js/src/jit/BitSet.cpp",
[("linux", "chunk1"), ("linux", "mochitest"), ("windows", "mochitest")],
)
assert_file_to_chunk(
c, "toolkit/components/osfile/osfile.jsm", "linux", "chunk2"
c, "toolkit/components/osfile/osfile.jsm", [("linux", "chunk2")]
)
assert_file_to_chunk(c, "code_coverage_bot/cli.py", "windows", "chunk1")
assert_file_to_chunk(c, "js/src/jit/JIT.cpp", "windows", "chunk2")
assert_file_to_chunk(c, "code_coverage_bot/cli.py", [("windows", "chunk1")])
assert_file_to_chunk(c, "js/src/jit/JIT.cpp", [("windows", "chunk2")])

assert_chunk_to_test(c, "linux", "marionette-headless", ["marionette-test1"])
assert_chunk_to_test(c, "windows", "marionette", ["marionette-test2"])