diff --git a/changelog/812.feature b/changelog/812.feature new file mode 100644 index 00000000..1d0eb0ee --- /dev/null +++ b/changelog/812.feature @@ -0,0 +1,22 @@ +Partially restore old initial batch distribution algorithm in ``LoadScheduling``. + +pytest orders tests for optimal sequential execution - i. e. avoiding +unnecessary setup and teardown of fixtures. So executing tests in consecutive +chunks is important for optimal performance. + +In v1.14, initial test distribution in ``LoadScheduling`` was changed to +round-robin, optimized for the corner case, when the number of tests is less +than ``2 * number of nodes``. At the same time, it became worse for all other +cases. + +For example: if some tests use some "heavy" fixture, and these tests fit into +the initial batch, with round-robin distribution the fixture will be created +``min(n_tests, n_workers)`` times, no matter how many other tests there are. + +With the old algorithm (before v1.14), if there are enough tests not using +the fixture, the fixture was created only once. + +So restore the old behavior for typical cases where the number of tests is +much greater than the number of workers (or, strictly speaking, when there +are at least 2 tests for every node). + diff --git a/src/xdist/scheduler/load.py b/src/xdist/scheduler/load.py index f32caa55..f3bef40b 100644 --- a/src/xdist/scheduler/load.py +++ b/src/xdist/scheduler/load.py @@ -248,13 +248,27 @@ def schedule(self): # Send a batch of tests to run. If we don't have at least two # tests per node, we have to send them all so that we can send # shutdown signals and get all nodes working. - initial_batch = max(len(self.pending) // 4, 2 * len(self.nodes)) - - # distribute tests round-robin up to the batch size - # (or until we run out) - nodes = cycle(self.nodes) - for i in range(initial_batch): - self._send_tests(next(nodes), 1) + if len(self.pending) < 2 * len(self.nodes): + # Distribute tests round-robin. Try to load all nodes if there are + # enough tests. The other branch tries sends at least 2 tests + # to each node - which is suboptimal when you have less than + # 2 * len(nodes) tests. + nodes = cycle(self.nodes) + for i in range(len(self.pending)): + self._send_tests(next(nodes), 1) + else: + # Send batches of consecutive tests. By default, pytest sorts tests + # in order for optimal single-threaded execution, minimizing the + # number of necessary fixture setup/teardown. Try to keep that + # optimal order for every worker. + + # how many items per node do we have about? + items_per_node = len(self.collection) // len(self.node2pending) + # take a fraction of tests for initial distribution + node_chunksize = max(items_per_node // 4, 2) + # and initialize each node with a chunk of tests + for node in self.nodes: + self._send_tests(node, node_chunksize) if not self.pending: # initial distribution sent all tests, start node shutdown diff --git a/testing/test_dsession.py b/testing/test_dsession.py index d3a57152..86273b8c 100644 --- a/testing/test_dsession.py +++ b/testing/test_dsession.py @@ -115,18 +115,18 @@ def test_schedule_batch_size(self, pytester: pytest.Pytester) -> None: # assert not sched.tests_finished sent1 = node1.sent sent2 = node2.sent - assert sent1 == [0, 2] - assert sent2 == [1, 3] + assert sent1 == [0, 1] + assert sent2 == [2, 3] assert sched.pending == [4, 5] assert sched.node2pending[node1] == sent1 assert sched.node2pending[node2] == sent2 assert len(sched.pending) == 2 sched.mark_test_complete(node1, 0) - assert node1.sent == [0, 2, 4] + assert node1.sent == [0, 1, 4] assert sched.pending == [5] - assert node2.sent == [1, 3] - sched.mark_test_complete(node1, 2) - assert node1.sent == [0, 2, 4, 5] + assert node2.sent == [2, 3] + sched.mark_test_complete(node1, 1) + assert node1.sent == [0, 1, 4, 5] assert not sched.pending def test_schedule_fewer_tests_than_nodes(self, pytester: pytest.Pytester) -> None: