From bb2fb7d53ccbbdb162f19f45ec4f701d50ff4546 Mon Sep 17 00:00:00 2001 From: Greg Hayes Date: Fri, 30 Sep 2022 15:47:06 -0500 Subject: [PATCH 1/6] Parameterize test_double_diff to assess behavior in both low and high memory scenarios --- .github/workflows/tests.yml | 90 +++++++++++++++++----------------- tests/benchmarks/test_array.py | 8 ++- 2 files changed, 51 insertions(+), 47 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 54ca6c629d..90f3907fdb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,52 +31,52 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.9"] - pytest_args: [tests] + pytest_args: [tests/benchmarks/test_array.py::test_double_diff] runtime-version: [upstream, latest, "0.0.4", "0.1.0"] - include: - # Run stability tests on Python 3.8 - - pytest_args: tests/stability - python-version: "3.8" - runtime-version: upstream - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.8" - runtime-version: latest - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.8" - runtime-version: "0.0.4" - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.8" - runtime-version: "0.1.0" - os: ubuntu-latest - # Run stability tests on Python 3.10 - - pytest_args: tests/stability - python-version: "3.10" - runtime-version: upstream - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.10" - runtime-version: latest - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.10" - runtime-version: "0.0.4" - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.10" - runtime-version: "0.1.0" - os: ubuntu-latest - # Run stability tests on Python Windows and MacOS (latest py39 only) - - pytest_args: tests/stability - python-version: "3.9" - runtime-version: latest - os: windows-latest - - pytest_args: tests/stability - python-version: "3.9" - runtime-version: latest - os: macos-latest + # include: + # # Run stability tests on Python 3.8 + # - pytest_args: tests/stability + # python-version: "3.8" + # runtime-version: upstream + # os: ubuntu-latest + # - pytest_args: tests/stability + # python-version: "3.8" + # runtime-version: latest + # os: ubuntu-latest + # - pytest_args: tests/stability + # python-version: "3.8" + # runtime-version: "0.0.4" + # os: ubuntu-latest + # - pytest_args: tests/stability + # python-version: "3.8" + # runtime-version: "0.1.0" + # os: ubuntu-latest + # # Run stability tests on Python 3.10 + # - pytest_args: tests/stability + # python-version: "3.10" + # runtime-version: upstream + # os: ubuntu-latest + # - pytest_args: tests/stability + # python-version: "3.10" + # runtime-version: latest + # os: ubuntu-latest + # - pytest_args: tests/stability + # python-version: "3.10" + # runtime-version: "0.0.4" + # os: ubuntu-latest + # - pytest_args: tests/stability + # python-version: "3.10" + # runtime-version: "0.1.0" + # os: ubuntu-latest + # # Run stability tests on Python Windows and MacOS (latest py39 only) + # - pytest_args: tests/stability + # python-version: "3.9" + # runtime-version: latest + # os: windows-latest + # - pytest_args: tests/stability + # python-version: "3.9" + # runtime-version: latest + # os: macos-latest steps: - name: Checkout diff --git a/tests/benchmarks/test_array.py b/tests/benchmarks/test_array.py index 576a053afa..cadc388537 100644 --- a/tests/benchmarks/test_array.py +++ b/tests/benchmarks/test_array.py @@ -135,10 +135,14 @@ def pad_rechunk(arr): wait(arr_to_devnull(result), small_client, 10 * 60) -def test_double_diff(small_client): +@pytest.mark.parametrize("ds_size_fraction_of_cluster", [(1),(2)]) +def test_double_diff(ds_size_fraction_of_cluster, small_client): # Variant of https://github.com/dask/distributed/issues/6597 - memory = cluster_memory(small_client) # 76.66 GiB + # We speculate this test shows a lot of variability due to high memory pressure + # Here we reduce memory pressure to assess Dask's behavior. This will catch issues + # like https://github.com/dask/dask/issues/9488 + memory = cluster_memory(small_client) / ds_size_fraction_of_cluster # 76.66 GiB # TODO switch back to chunksizes in the `chunks=` argument everywhere # when https://github.com/dask/dask/issues/9488 is fixed cs = int((parse_bytes("20 MiB") / 8) ** (1 / 2)) From 2ab0a87007cd2725a1b64be979bfa3ea54609546 Mon Sep 17 00:00:00 2001 From: Greg Hayes Date: Fri, 30 Sep 2022 15:53:14 -0500 Subject: [PATCH 2/6] Revert to string literals in array chunking --- tests/benchmarks/test_array.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/benchmarks/test_array.py b/tests/benchmarks/test_array.py index cadc388537..a3fb532db8 100644 --- a/tests/benchmarks/test_array.py +++ b/tests/benchmarks/test_array.py @@ -140,14 +140,12 @@ def test_double_diff(ds_size_fraction_of_cluster, small_client): # Variant of https://github.com/dask/distributed/issues/6597 # We speculate this test shows a lot of variability due to high memory pressure - # Here we reduce memory pressure to assess Dask's behavior. This will catch issues - # like https://github.com/dask/dask/issues/9488 + # Here we can assess Dask's behavior under both conditions. + # This will catch issues like https://github.com/dask/dask/issues/9488 memory = cluster_memory(small_client) / ds_size_fraction_of_cluster # 76.66 GiB - # TODO switch back to chunksizes in the `chunks=` argument everywhere - # when https://github.com/dask/dask/issues/9488 is fixed - cs = int((parse_bytes("20 MiB") / 8) ** (1 / 2)) - a = da.random.random(scaled_array_shape(memory, ("x", "x")), chunks=(cs, cs)) - b = da.random.random(scaled_array_shape(memory, ("x", "x")), chunks=(cs, cs)) + + a = da.random.random(scaled_array_shape(memory, ("x", "x")), chunks=("20MiB", "20MiB")) + b = da.random.random(scaled_array_shape(memory, ("x", "x")), chunks=("20MiB", "20MiB")) print_size_info(memory, memory, a, b) diff = a[1:, 1:] - b[:-1, :-1] From 813809044dacb10acfaa5032a1711fbe9db91ec4 Mon Sep 17 00:00:00 2001 From: Greg Hayes Date: Fri, 30 Sep 2022 15:56:29 -0500 Subject: [PATCH 3/6] Linting --- tests/benchmarks/test_array.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/benchmarks/test_array.py b/tests/benchmarks/test_array.py index a3fb532db8..0dd230e12e 100644 --- a/tests/benchmarks/test_array.py +++ b/tests/benchmarks/test_array.py @@ -135,7 +135,7 @@ def pad_rechunk(arr): wait(arr_to_devnull(result), small_client, 10 * 60) -@pytest.mark.parametrize("ds_size_fraction_of_cluster", [(1),(2)]) +@pytest.mark.parametrize("ds_size_fraction_of_cluster", [(1), (2)]) def test_double_diff(ds_size_fraction_of_cluster, small_client): # Variant of https://github.com/dask/distributed/issues/6597 @@ -144,8 +144,12 @@ def test_double_diff(ds_size_fraction_of_cluster, small_client): # This will catch issues like https://github.com/dask/dask/issues/9488 memory = cluster_memory(small_client) / ds_size_fraction_of_cluster # 76.66 GiB - a = da.random.random(scaled_array_shape(memory, ("x", "x")), chunks=("20MiB", "20MiB")) - b = da.random.random(scaled_array_shape(memory, ("x", "x")), chunks=("20MiB", "20MiB")) + a = da.random.random( + scaled_array_shape(memory, ("x", "x")), chunks=("20MiB", "20MiB") + ) + b = da.random.random( + scaled_array_shape(memory, ("x", "x")), chunks=("20MiB", "20MiB") + ) print_size_info(memory, memory, a, b) diff = a[1:, 1:] - b[:-1, :-1] From 54848b19046572d1053d47a8655454c498409dcf Mon Sep 17 00:00:00 2001 From: Greg Hayes Date: Fri, 30 Sep 2022 16:32:28 -0500 Subject: [PATCH 4/6] Linting --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 90f3907fdb..8306a70362 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,7 +31,7 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.9"] - pytest_args: [tests/benchmarks/test_array.py::test_double_diff] + pytest_args: [tests/benchmarks/test_array.py] runtime-version: [upstream, latest, "0.0.4", "0.1.0"] # include: # # Run stability tests on Python 3.8 From 026c05ef4a1448cf2117fd8d33549ac8a3d9f50a Mon Sep 17 00:00:00 2001 From: Greg Hayes Date: Tue, 4 Oct 2022 11:45:14 -0500 Subject: [PATCH 5/6] Update tests/benchmarks/test_array.py Co-authored-by: Naty Clementi --- tests/benchmarks/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmarks/test_array.py b/tests/benchmarks/test_array.py index 0dd230e12e..7f829b1a25 100644 --- a/tests/benchmarks/test_array.py +++ b/tests/benchmarks/test_array.py @@ -135,7 +135,7 @@ def pad_rechunk(arr): wait(arr_to_devnull(result), small_client, 10 * 60) -@pytest.mark.parametrize("ds_size_fraction_of_cluster", [(1), (2)]) +@pytest.mark.parametrize("ds_size_fraction_of_cluster", [1, 2]) def test_double_diff(ds_size_fraction_of_cluster, small_client): # Variant of https://github.com/dask/distributed/issues/6597 From aac37e0c94c6181d783af557bfbe922afe559a0d Mon Sep 17 00:00:00 2001 From: Greg Hayes Date: Tue, 4 Oct 2022 11:53:35 -0500 Subject: [PATCH 6/6] Add floor division on cluster_memory for target dataset size --- tests/benchmarks/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmarks/test_array.py b/tests/benchmarks/test_array.py index 7f829b1a25..ab64587fda 100644 --- a/tests/benchmarks/test_array.py +++ b/tests/benchmarks/test_array.py @@ -142,7 +142,7 @@ def test_double_diff(ds_size_fraction_of_cluster, small_client): # We speculate this test shows a lot of variability due to high memory pressure # Here we can assess Dask's behavior under both conditions. # This will catch issues like https://github.com/dask/dask/issues/9488 - memory = cluster_memory(small_client) / ds_size_fraction_of_cluster # 76.66 GiB + memory = cluster_memory(small_client) // ds_size_fraction_of_cluster # 76.66 GiB a = da.random.random( scaled_array_shape(memory, ("x", "x")), chunks=("20MiB", "20MiB")