From 7f85422ff718309b6d73237111ae9218b01b2130 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 23 Oct 2025 11:35:20 +0100 Subject: [PATCH 1/3] Try using array hashes for array_equal of lazy arrays: N.B. should accelerate aux-coord comparision. --- lib/iris/util.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index b3ce7941c5..e0df5893fd 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -509,20 +509,30 @@ def normalise_array(array): eq = array1.shape == array2.shape if eq: if is_lazy_data(array1) or is_lazy_data(array2): - # Use a separate map and reduce operation to avoid running out of memory. - ndim = array1.ndim - indices = tuple(range(ndim)) - eq = da.blockwise( - _masked_array_equal, - indices, - array1, - indices, - array2, - indices, - dtype=bool, - meta=np.empty((0,) * ndim, dtype=bool), - equal_nan=withnans, - ).all() + # Compare lazy arrays by hashing, and cache the hashes... + def array_hash(array): + if not hasattr(array, "_iris_array_hash"): + from iris._concatenate import _hash_array + + hash = _hash_array(array) + array._iris_array_hash = hash + return array._iris_array_hash + + eq = array_hash(array1) == array_hash(array2) + # # Use a separate map and reduce operation to avoid running out of memory. + # ndim = array1.ndim + # indices = tuple(range(ndim)) + # eq = da.blockwise( + # _masked_array_equal, + # indices, + # array1, + # indices, + # array2, + # indices, + # dtype=bool, + # meta=np.empty((0,) * ndim, dtype=bool), + # equal_nan=withnans, + # ).all() else: eq = _masked_array_equal(array1, array2, equal_nan=withnans).all() From 3445020b993210e6e3f7549d1d2713369bc5532e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 23 Oct 2025 11:42:14 +0100 Subject: [PATCH 2/3] add specific test --- lib/iris/tests/unit/coords/test_AuxCoord.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lib/iris/tests/unit/coords/test_AuxCoord.py b/lib/iris/tests/unit/coords/test_AuxCoord.py index 300238b3c1..4b40c6e2d4 100644 --- a/lib/iris/tests/unit/coords/test_AuxCoord.py +++ b/lib/iris/tests/unit/coords/test_AuxCoord.py @@ -774,3 +774,24 @@ def test_nanpoints_eq_copy(self): def test_nanbounds_eq_self(self): co1 = AuxCoord([15.0, 25.0], bounds=[[14.0, 16.0], [24.0, np.nan]]) assert co1 == co1 + + def test_lazy_compares_via_hash(self): + def lazify(coord): + coord.bounds = coord.lazy_bounds() + + co1 = AuxCoord([15.0, 25.0]) + co2 = AuxCoord([15.0, 25.001]) + co1.points = co1.lazy_points() + co2.points = co2.lazy_points() + assert co1.has_lazy_points() + assert co2.has_lazy_points() + assert not hasattr(co1.core_points(), "_iris_array_hash") + assert not hasattr(co2.core_points(), "_iris_array_hash") + + eq = co1 == co2 + assert not eq + + assert co1.has_lazy_points() + assert co2.has_lazy_points() + assert hasattr(co1.core_points(), "_iris_array_hash") + assert hasattr(co2.core_points(), "_iris_array_hash") From 852649b7dc30b53f51b9a46e52b003dd8d6407c6 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 23 Oct 2025 12:02:52 +0100 Subject: [PATCH 3/3] Only cache hashes on *lazy* arrays. --- lib/iris/tests/unit/coords/test_AuxCoord.py | 18 ++++++++++++------ lib/iris/util.py | 10 +++++++--- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/iris/tests/unit/coords/test_AuxCoord.py b/lib/iris/tests/unit/coords/test_AuxCoord.py index 4b40c6e2d4..20fd16377b 100644 --- a/lib/iris/tests/unit/coords/test_AuxCoord.py +++ b/lib/iris/tests/unit/coords/test_AuxCoord.py @@ -775,23 +775,29 @@ def test_nanbounds_eq_self(self): co1 = AuxCoord([15.0, 25.0], bounds=[[14.0, 16.0], [24.0, np.nan]]) assert co1 == co1 - def test_lazy_compares_via_hash(self): + @pytest.mark.parametrize("bothlazy", [True, False], ids=["bothlazy", "onelazy"]) + def test_lazy_compares_via_hash(self, bothlazy): def lazify(coord): coord.bounds = coord.lazy_bounds() co1 = AuxCoord([15.0, 25.0]) co2 = AuxCoord([15.0, 25.001]) + co1.points = co1.lazy_points() - co2.points = co2.lazy_points() + if bothlazy: + co2.points = co2.lazy_points() assert co1.has_lazy_points() - assert co2.has_lazy_points() + assert co2.has_lazy_points() == bothlazy + assert not hasattr(co1.core_points(), "_iris_array_hash") - assert not hasattr(co2.core_points(), "_iris_array_hash") + if bothlazy: + assert not hasattr(co2.core_points(), "_iris_array_hash") eq = co1 == co2 assert not eq assert co1.has_lazy_points() - assert co2.has_lazy_points() assert hasattr(co1.core_points(), "_iris_array_hash") - assert hasattr(co2.core_points(), "_iris_array_hash") + if bothlazy: + assert co2.has_lazy_points() + assert hasattr(co2.core_points(), "_iris_array_hash") diff --git a/lib/iris/util.py b/lib/iris/util.py index e0df5893fd..dc6dc58586 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -511,12 +511,16 @@ def normalise_array(array): if is_lazy_data(array1) or is_lazy_data(array2): # Compare lazy arrays by hashing, and cache the hashes... def array_hash(array): - if not hasattr(array, "_iris_array_hash"): + if hasattr(array, "_iris_array_hash"): + hash = array._iris_array_hash + else: from iris._concatenate import _hash_array hash = _hash_array(array) - array._iris_array_hash = hash - return array._iris_array_hash + if isinstance(array, da.Array): + # Can't save hashes on a numpy array, but CAN on a Dask array + array._iris_array_hash = hash + return hash eq = array_hash(array1) == array_hash(array2) # # Use a separate map and reduce operation to avoid running out of memory.