Skip to content

Commit 7dec4ae

Browse files
authored
Remove batching on unittest thread, use historical data to inform batching (#18578)
* Remove batching on unittest thread * Batch more things, improve output, use past test perf as a better heuristic for future test runs * Fix merge sideeffect * Fix typo
1 parent b9b1127 commit 7dec4ae

File tree

4 files changed

+132
-41
lines changed

4 files changed

+132
-41
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,4 @@ internal/
5959
.idea
6060
yarn.lock
6161
package-lock.json
62+
.parallelperf.json

src/harness/parallel/host.ts

Lines changed: 120 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@ if (typeof describe === "undefined") {
55
namespace Harness.Parallel.Host {
66

77
interface ChildProcessPartial {
8-
send(message: any, callback?: (error: Error) => void): boolean;
8+
send(message: ParallelHostMessage, callback?: (error: Error) => void): boolean;
99
on(event: "error", listener: (err: Error) => void): this;
1010
on(event: "exit", listener: (code: number, signal: string) => void): this;
11-
on(event: "message", listener: (message: any) => void): this;
12-
disconnect(): void;
11+
on(event: "message", listener: (message: ParallelClientMessage) => void): this;
1312
}
1413

1514
interface ProgressBarsOptions {
@@ -27,23 +26,54 @@ namespace Harness.Parallel.Host {
2726
text?: string;
2827
}
2928

29+
const perfdataFileName = ".parallelperf.json";
30+
function readSavedPerfData(): {[testHash: string]: number} {
31+
const perfDataContents = Harness.IO.readFile(perfdataFileName);
32+
if (perfDataContents) {
33+
return JSON.parse(perfDataContents);
34+
}
35+
return undefined;
36+
}
37+
38+
function hashName(runner: TestRunnerKind, test: string) {
39+
return `tsrunner-${runner}://${test}`;
40+
}
41+
3042
export function start() {
3143
initializeProgressBarsDependencies();
3244
console.log("Discovering tests...");
3345
const discoverStart = +(new Date());
3446
const { statSync }: { statSync(path: string): { size: number }; } = require("fs");
3547
const tasks: { runner: TestRunnerKind, file: string, size: number }[] = [];
36-
let totalSize = 0;
48+
const perfData = readSavedPerfData();
49+
let totalCost = 0;
50+
let unknownValue: string | undefined;
3751
for (const runner of runners) {
3852
const files = runner.enumerateTestFiles();
3953
for (const file of files) {
40-
const size = statSync(file).size;
54+
let size: number;
55+
if (!perfData) {
56+
size = statSync(file).size;
57+
58+
}
59+
else {
60+
const hashedName = hashName(runner.kind(), file);
61+
size = perfData[hashedName];
62+
if (size === undefined) {
63+
size = Number.MAX_SAFE_INTEGER;
64+
unknownValue = hashedName;
65+
}
66+
}
4167
tasks.push({ runner: runner.kind(), file, size });
42-
totalSize += size;
68+
totalCost += size;
4369
}
4470
}
4571
tasks.sort((a, b) => a.size - b.size);
46-
const batchSize = (totalSize / workerCount) * 0.9;
72+
// 1 fewer batches than threads to account for unittests running on the final thread
73+
const batchCount = runners.length === 1 ? workerCount : workerCount - 1;
74+
const packfraction = 0.9;
75+
const chunkSize = 1000; // ~1KB or 1s for sending batches near the end of a test
76+
const batchSize = (totalCost / workerCount) * packfraction; // Keep spare tests for unittest thread in reserve
4777
console.log(`Discovered ${tasks.length} test files in ${+(new Date()) - discoverStart}ms.`);
4878
console.log(`Starting to run tests using ${workerCount} threads...`);
4979
const { fork }: { fork(modulePath: string, args?: string[], options?: {}): ChildProcessPartial; } = require("child_process");
@@ -59,15 +89,17 @@ namespace Harness.Parallel.Host {
5989
const progressUpdateInterval = 1 / progressBars._options.width;
6090
let nextProgress = progressUpdateInterval;
6191

92+
const newPerfData: {[testHash: string]: number} = {};
93+
6294
const workers: ChildProcessPartial[] = [];
95+
let closedWorkers = 0;
6396
for (let i = 0; i < workerCount; i++) {
6497
// TODO: Just send the config over the IPC channel or in the command line arguments
6598
const config: TestConfig = { light: Harness.lightMode, listenForWork: true, runUnitTests: runners.length === 1 ? false : i === workerCount - 1 };
6699
const configPath = ts.combinePaths(taskConfigsFolder, `task-config${i}.json`);
67100
Harness.IO.writeFile(configPath, JSON.stringify(config));
68101
const child = fork(__filename, [`--config="${configPath}"`]);
69102
child.on("error", err => {
70-
child.disconnect();
71103
console.error("Unexpected error in child process:");
72104
console.error(err);
73105
return process.exit(2);
@@ -81,7 +113,6 @@ namespace Harness.Parallel.Host {
81113
child.on("message", (data: ParallelClientMessage) => {
82114
switch (data.type) {
83115
case "error": {
84-
child.disconnect();
85116
console.error(`Test worker encounted unexpected error and was forced to close:
86117
Message: ${data.payload.error}
87118
Stack: ${data.payload.stack}`);
@@ -97,6 +128,7 @@ namespace Harness.Parallel.Host {
97128
else {
98129
passingFiles++;
99130
}
131+
newPerfData[hashName(data.payload.runner, data.payload.file)] = data.payload.duration;
100132

101133
const progress = (failingFiles + passingFiles) / totalFiles;
102134
if (progress >= nextProgress) {
@@ -106,20 +138,27 @@ namespace Harness.Parallel.Host {
106138
updateProgress(progress, errorResults.length ? `${errorResults.length} failing` : `${totalPassing} passing`, errorResults.length ? "fail" : undefined);
107139
}
108140

109-
if (failingFiles + passingFiles === totalFiles) {
110-
// Done. Finished every task and collected results.
111-
child.send({ type: "close" });
112-
child.disconnect();
113-
return outputFinalResult();
114-
}
115-
if (tasks.length === 0) {
116-
// No more tasks to distribute
117-
child.send({ type: "close" });
118-
child.disconnect();
119-
return;
120-
}
121141
if (data.type === "result") {
122-
child.send({ type: "test", payload: tasks.pop() });
142+
if (tasks.length === 0) {
143+
// No more tasks to distribute
144+
child.send({ type: "close" });
145+
closedWorkers++;
146+
if (closedWorkers === workerCount) {
147+
outputFinalResult();
148+
}
149+
return;
150+
}
151+
// Send tasks in blocks if the tasks are small
152+
const taskList = [tasks.pop()];
153+
while (tasks.length && taskList.reduce((p, c) => p + c.size, 0) > chunkSize) {
154+
taskList.push(tasks.pop());
155+
}
156+
if (taskList.length === 1) {
157+
child.send({ type: "test", payload: taskList[0] });
158+
}
159+
else {
160+
child.send({ type: "batch", payload: taskList });
161+
}
123162
}
124163
}
125164
}
@@ -130,12 +169,13 @@ namespace Harness.Parallel.Host {
130169
// It's only really worth doing an initial batching if there are a ton of files to go through
131170
if (totalFiles > 1000) {
132171
console.log("Batching initial test lists...");
133-
const batches: { runner: TestRunnerKind, file: string, size: number }[][] = new Array(workerCount);
134-
const doneBatching = new Array(workerCount);
172+
const batches: { runner: TestRunnerKind, file: string, size: number }[][] = new Array(batchCount);
173+
const doneBatching = new Array(batchCount);
174+
let scheduledTotal = 0;
135175
batcher: while (true) {
136-
for (let i = 0; i < workerCount; i++) {
176+
for (let i = 0; i < batchCount; i++) {
137177
if (tasks.length === 0) {
138-
// TODO: This indicates a particularly suboptimal packing
178+
console.log(`Suboptimal packing detected: no tests remain to be stolen. Reduce packing fraction from ${packfraction} to fix.`);
139179
break batcher;
140180
}
141181
if (doneBatching[i]) {
@@ -145,26 +185,36 @@ namespace Harness.Parallel.Host {
145185
batches[i] = [];
146186
}
147187
const total = batches[i].reduce((p, c) => p + c.size, 0);
148-
if (total >= batchSize && !doneBatching[i]) {
188+
if (total >= batchSize) {
149189
doneBatching[i] = true;
150190
continue;
151191
}
152-
batches[i].push(tasks.pop());
192+
const task = tasks.pop();
193+
batches[i].push(task);
194+
scheduledTotal += task.size;
153195
}
154-
for (let j = 0; j < workerCount; j++) {
196+
for (let j = 0; j < batchCount; j++) {
155197
if (!doneBatching[j]) {
156-
continue;
198+
continue batcher;
157199
}
158200
}
159201
break;
160202
}
161-
console.log(`Batched into ${workerCount} groups with approximate total file sizes of ${Math.floor(batchSize)} bytes in each group.`);
203+
const prefix = `Batched into ${batchCount} groups`;
204+
if (unknownValue) {
205+
console.log(`${prefix}. Unprofiled tests including ${unknownValue} will be run first.`);
206+
}
207+
else {
208+
console.log(`${prefix} with approximate total ${perfData ? "time" : "file sizes"} of ${perfData ? ms(batchSize) : `${Math.floor(batchSize)} bytes`} in each group. (${(scheduledTotal / totalCost * 100).toFixed(1)}% of total tests batched)`);
209+
}
162210
for (const worker of workers) {
163-
const action: ParallelBatchMessage = { type: "batch", payload: batches.pop() };
164-
if (!action.payload[0]) {
165-
throw new Error(`Tried to send invalid message ${action}`);
211+
const payload = batches.pop();
212+
if (payload) {
213+
worker.send({ type: "batch", payload });
214+
}
215+
else { // Unittest thread - send off just one test
216+
worker.send({ type: "test", payload: tasks.pop() });
166217
}
167-
worker.send(action);
168218
}
169219
}
170220
else {
@@ -177,7 +227,6 @@ namespace Harness.Parallel.Host {
177227
updateProgress(0);
178228
let duration: number;
179229

180-
const ms = require("mocha/lib/ms");
181230
function completeBar() {
182231
const isPartitionFail = failingFiles !== 0;
183232
const summaryColor = isPartitionFail ? "fail" : "green";
@@ -235,6 +284,8 @@ namespace Harness.Parallel.Host {
235284
reporter.epilogue();
236285
}
237286

287+
Harness.IO.writeFile(perfdataFileName, JSON.stringify(newPerfData, null, 4)); // tslint:disable-line:no-null-keyword
288+
238289
process.exit(errorResults.length);
239290
}
240291

@@ -264,6 +315,38 @@ namespace Harness.Parallel.Host {
264315
let tty: { isatty(x: number): boolean };
265316
let isatty: boolean;
266317

318+
const s = 1000;
319+
const m = s * 60;
320+
const h = m * 60;
321+
const d = h * 24;
322+
function ms(ms: number) {
323+
let result = "";
324+
if (ms >= d) {
325+
const count = Math.floor(ms / d);
326+
result += count + "d";
327+
ms -= count * d;
328+
}
329+
if (ms >= h) {
330+
const count = Math.floor(ms / h);
331+
result += count + "h";
332+
ms -= count * h;
333+
}
334+
if (ms >= m) {
335+
const count = Math.floor(ms / m);
336+
result += count + "m";
337+
ms -= count * m;
338+
}
339+
if (ms >= s) {
340+
const count = Math.round(ms / s);
341+
result += count + "s";
342+
return result;
343+
}
344+
if (ms > 0) {
345+
result += Math.round(ms) + "ms";
346+
}
347+
return result;
348+
}
349+
267350
function initializeProgressBarsDependencies() {
268351
Mocha = require("mocha");
269352
Base = Mocha.reporters.Base;
@@ -286,7 +369,7 @@ namespace Harness.Parallel.Host {
286369
const close = options.close || "]";
287370
const complete = options.complete || "▬";
288371
const incomplete = options.incomplete || Base.symbols.dot;
289-
const maxWidth = Base.window.width - open.length - close.length - 30;
372+
const maxWidth = Base.window.width - open.length - close.length - 34;
290373
const width = minMax(options.width || maxWidth, 10, maxWidth);
291374
this._options = {
292375
open,

src/harness/parallel/shared.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace Harness.Parallel {
88

99
export type ParallelErrorMessage = { type: "error", payload: { error: string, stack: string } } | never;
1010
export type ErrorInfo = ParallelErrorMessage["payload"] & { name: string };
11-
export type ParallelResultMessage = { type: "result", payload: { passing: number, errors: ErrorInfo[] } } | never;
11+
export type ParallelResultMessage = { type: "result", payload: { passing: number, errors: ErrorInfo[], duration: number, runner: TestRunnerKind, file: string } } | never;
1212
export type ParallelBatchProgressMessage = { type: "progress", payload: ParallelResultMessage["payload"] } | never;
1313
export type ParallelClientMessage = ParallelErrorMessage | ParallelResultMessage | ParallelBatchProgressMessage;
1414
}

src/harness/parallel/worker.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ namespace Harness.Parallel.Worker {
1212
testList.length = 0;
1313
}
1414
reportedUnitTests = true;
15+
const start = +(new Date());
1516
runner.initializeTests();
1617
testList.forEach(({ name, callback, kind }) => executeCallback(name, callback, kind));
17-
return { errors, passing };
18+
return { errors, passing, duration: +(new Date()) - start };
1819
}
1920

2021

@@ -172,7 +173,13 @@ namespace Harness.Parallel.Worker {
172173
});
173174
process.on("uncaughtException", error => {
174175
const message: ParallelErrorMessage = { type: "error", payload: { error: error.message, stack: error.stack } };
175-
process.send(message);
176+
try {
177+
process.send(message);
178+
}
179+
catch (e) {
180+
console.error(error);
181+
throw error;
182+
}
176183
});
177184
if (!runUnitTests) {
178185
// ensure unit tests do not get run
@@ -189,7 +196,7 @@ namespace Harness.Parallel.Worker {
189196
}
190197
const instance = runners.get(runner);
191198
instance.tests = [file];
192-
return resetShimHarnessAndExecute(instance);
199+
return { ...resetShimHarnessAndExecute(instance), runner, file };
193200
}
194201
}
195202
}

0 commit comments

Comments
 (0)