From 73b6e03d1c1bd899ca812aff60f93fdf941247fa Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Sat, 27 Jul 2024 14:22:05 +0200 Subject: [PATCH 1/9] Experiment: try not to include out-of-radar-mask values in the probability matching --- pysteps/blending/steps.py | 12 +++++--- pysteps/postprocessing/probmatching.py | 42 +++++++++++++++++--------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index f7068aeb7..315b61ce6 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1540,11 +1540,10 @@ def worker(j): # both the extrapolation cascade and the model (NWP) cascade and use # that for the probability matching if probmatching_method is not None and resample_distribution: - # deal with missing values arr1 = R_pm_ep[t_index] arr2 = precip_models_pm_temp[j] - arr2 = np.where(np.isnan(arr2), np.nanmin(arr2), arr2) - arr1 = np.where(np.isnan(arr1), arr2, arr1) + # arr2 = np.where(np.isnan(arr2), np.nanmin(arr2), arr2) + # arr1 = np.where(np.isnan(arr1), arr2, arr1) # resample weights based on cascade level 2 R_pm_resampled = probmatching.resample_distributions( first_array=arr1, @@ -1555,11 +1554,14 @@ def worker(j): R_pm_resampled = R_pm_blended.copy() if probmatching_method == "cdf": + # nan indices in the extrapolation nowcast + nan_indices = np.isnan(R_pm_ep[t_index]) # Adjust the CDF of the forecast to match the most recent - # benchmark rainfall field (R_pm_blended). If the forecast + # benchmark rainfall field (R_pm_blended). + # Rainfall outside the pure extrapolation domain is not taken into account. if np.any(np.isfinite(R_f_new)): R_f_new = probmatching.nonparam_match_empirical_cdf( - R_f_new, R_pm_resampled + R_f_new, R_pm_resampled, nan_indices ) R_pm_resampled = None elif probmatching_method == "mean": diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 7c96149e9..ba89ba2a6 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -52,7 +52,7 @@ def compute_empirical_cdf(bin_edges, hist): return cdf -def nonparam_match_empirical_cdf(initial_array, target_array): +def nonparam_match_empirical_cdf(initial_array, target_array, nan_indices=None): """ Matches the empirical CDF of the initial array with the empirical CDF of a target array. Initial ranks are conserved, but empirical distribution @@ -64,7 +64,9 @@ def nonparam_match_empirical_cdf(initial_array, target_array): initial_array: array_like The initial array whose CDF is to be matched with the target. target_array: array_like - The target array. + The target array + nan_indices: array_like, optional + Indices of pixels in the initial_array which are to be ignored (not rescaled) Returns ------- @@ -74,8 +76,6 @@ def nonparam_match_empirical_cdf(initial_array, target_array): if np.any(~np.isfinite(initial_array)): raise ValueError("initial array contains non-finite values") - if np.any(~np.isfinite(target_array)): - raise ValueError("target array contains non-finite values") if initial_array.size != target_array.size: raise ValueError( "dimension mismatch between initial_array and target_array: " @@ -86,11 +86,19 @@ def nonparam_match_empirical_cdf(initial_array, target_array): target_array = np.array(target_array) # zeros in initial array - zvalue = initial_array.min() - idxzeros = initial_array == zvalue + zvalue = np.nanmin(initial_array) # zeros in the target array - zvalue_trg = target_array.min() + zvalue_trg = np.nanmin(target_array) + + # Apply the masks to the arrays - set all values outside it to zero. + if nan_indices is not None: + initial_array_clipped = initial_array.copy() + initial_array_clipped[nan_indices] = zvalue + # replace nans in target array by zvalue_trg + target_array = np.where(np.isnan(target_array), zvalue_trg, target_array) + + idxzeros = initial_array_clipped == zvalue # adjust the fraction of rain in target distribution if the number of # nonzeros is greater than in the initial array @@ -103,16 +111,16 @@ def nonparam_match_empirical_cdf(initial_array, target_array): # flatten the arrays arrayshape = initial_array.shape target_array = target_array.flatten() - initial_array = initial_array.flatten() + initial_array_clipped = initial_array_clipped.flatten() # rank target values order = target_array.argsort() ranked = target_array[order] # rank initial values order - orderin = initial_array.argsort() - ranks = np.empty(len(initial_array), int) - ranks[orderin] = np.arange(len(initial_array)) + orderin = initial_array_clipped.argsort() + ranks = np.empty(len(initial_array_clipped), int) + ranks[orderin] = np.arange(len(initial_array_clipped)) # get ranked values from target and rearrange with the initial order output_array = ranked[ranks] @@ -123,6 +131,9 @@ def nonparam_match_empirical_cdf(initial_array, target_array): # read original zeros output_array[idxzeros] = zvalue_trg + # Put back the original values outside the nan-mask of the target array. + output_array[nan_indices] = initial_array[nan_indices] + return output_array @@ -287,16 +298,19 @@ def resample_distributions(first_array, second_array, probability_first_array): Raises ------ ValueError - If `first_array` and `second_array` do not have the same shape or if inputs contain NaNs. + If `first_array` and `second_array` do not have the same shape. """ # Valide inputs if first_array.shape != second_array.shape: raise ValueError("first_array and second_array must have the same shape") - if np.isnan(first_array).any() or np.isnan(second_array).any(): - raise ValueError("Input arrays must not contain NaNs") probability_first_array = np.clip(probability_first_array, 0.0, 1.0) + # Propagate the NaN values of the arrays to each other. + nanmask = np.logical_or(np.isnan(first_array), np.isnan(second_array)) + first_array[nanmask] = np.nan + second_array[nanmask] = np.nan + # Flatten and sort the arrays asort = np.sort(first_array, axis=None)[::-1] bsort = np.sort(second_array, axis=None)[::-1] From 35a09b987cdf39d982f6747a4915782b7ef9c78d Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Thu, 22 Aug 2024 09:39:43 +0200 Subject: [PATCH 2/9] Fix resample_distributions in case of integers (test case) and add test. --- pysteps/postprocessing/probmatching.py | 13 ++-- .../tests/test_postprocessing_probmatching.py | 71 +++++++++++++++---- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index ba89ba2a6..fba538da3 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -275,6 +275,7 @@ def resample_distributions(first_array, second_array, probability_first_array): """ Merges two distributions (e.g., from the extrapolation nowcast and NWP in the blending module) to effectively combine two distributions for probability matching without losing extremes. + Entries for which one array has a nan will not be included from the other array either. Parameters ---------- @@ -306,10 +307,13 @@ def resample_distributions(first_array, second_array, probability_first_array): raise ValueError("first_array and second_array must have the same shape") probability_first_array = np.clip(probability_first_array, 0.0, 1.0) - # Propagate the NaN values of the arrays to each other. - nanmask = np.logical_or(np.isnan(first_array), np.isnan(second_array)) - first_array[nanmask] = np.nan - second_array[nanmask] = np.nan + # Propagate the NaN values of the arrays to each other if there are any; convert to float to make sure this works. + nanmask = np.isnan(first_array) | np.isnan(second_array) + if np.any(nanmask): + first_array = first_array.astype(float) + first_array[nanmask] = np.nan + second_array = second_array.astype(float) + second_array[nanmask] = np.nan # Flatten and sort the arrays asort = np.sort(first_array, axis=None)[::-1] @@ -321,6 +325,7 @@ def resample_distributions(first_array, second_array, probability_first_array): csort = np.where(idxsamples, asort, bsort) csort = np.sort(csort)[::-1] + # Return the resampled array in descending order (starting with the nan values) return csort diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py index fa27cdf39..87d21ab4d 100644 --- a/pysteps/tests/test_postprocessing_probmatching.py +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -39,19 +39,62 @@ def test_probability_one(self): ) assert np.array_equal(result, np.sort(first_array)[::-1]) - def test_nan_in_inputs(self): + def test_nan_in_arr1_prob_1(self): + array_with_nan = np.array([1, 3, np.nan, 7, 9]) + array_without_nan = np.array([2.0, 4, 6, 8, 10]) + probability_first_array = 1.0 + result = resample_distributions( + array_with_nan, array_without_nan, probability_first_array + ) + expected_result = np.array([np.nan, 9, 7, 3, 1], dtype=float) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_nan_in_arr1_prob_0(self): array_with_nan = np.array([1, 3, np.nan, 7, 9]) array_without_nan = np.array([2, 4, 6, 8, 10]) - probability_first_array = 0.6 - with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): - resample_distributions( - array_with_nan, array_without_nan, probability_first_array - ) - with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): - resample_distributions( - array_without_nan, array_with_nan, probability_first_array - ) - with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): - resample_distributions( - array_with_nan, array_with_nan, probability_first_array - ) + probability_first_array = 0.0 + result = resample_distributions( + array_with_nan, array_without_nan, probability_first_array + ) + expected_result = np.array([np.nan, 10, 8, 4, 2], dtype=float) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_nan_in_arr2_prob_1(self): + array_without_nan = np.array([1, 3, 5, 7, 9]) + array_with_nan = np.array([2.0, 4, 6, np.nan, 10]) + probability_first_array = 1.0 + result = resample_distributions( + array_without_nan, array_with_nan, probability_first_array + ) + expected_result = np.array([np.nan, 9, 5, 3, 1], dtype=float) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_nan_in_arr2_prob_0(self): + array_without_nan = np.array([1, 3, 5, 7, 9]) + array_with_nan = np.array([2, 4, 6, np.nan, 10]) + probability_first_array = 0.0 + result = resample_distributions( + array_without_nan, array_with_nan, probability_first_array + ) + expected_result = np.array([np.nan, 10, 6, 4, 2], dtype=float) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_nan_in_both_prob_1(self): + array1_with_nan = np.array([1, np.nan, np.nan, 7, 9]) + array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) + probability_first_array = 1.0 + result = resample_distributions( + array1_with_nan, array2_with_nan, probability_first_array + ) + expected_result = np.array([np.nan, np.nan, np.nan, 9, 1], dtype=float) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_nan_in_both_prob_0(self): + array1_with_nan = np.array([1, np.nan, np.nan, 7, 9]) + array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) + probability_first_array = 0.0 + result = resample_distributions( + array1_with_nan, array2_with_nan, probability_first_array + ) + expected_result = np.array([np.nan, np.nan, np.nan, 10, 2], dtype=float) + assert np.allclose(result, expected_result, equal_nan=True) From c307cbf5b9064d408274022453cda08888fecdcd Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Fri, 23 Aug 2024 08:02:05 +0200 Subject: [PATCH 3/9] Finish probability matching with nans Error out if mask doesn't cover all nans --- pysteps/postprocessing/probmatching.py | 58 +++++++++++++------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index fba538da3..fb61834e5 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -52,7 +52,7 @@ def compute_empirical_cdf(bin_edges, hist): return cdf -def nonparam_match_empirical_cdf(initial_array, target_array, nan_indices=None): +def nonparam_match_empirical_cdf(initial_array, target_array, ignore_indices=None): """ Matches the empirical CDF of the initial array with the empirical CDF of a target array. Initial ranks are conserved, but empirical distribution @@ -65,7 +65,7 @@ def nonparam_match_empirical_cdf(initial_array, target_array, nan_indices=None): The initial array whose CDF is to be matched with the target. target_array: array_like The target array - nan_indices: array_like, optional + ignore_indices: array_like, optional Indices of pixels in the initial_array which are to be ignored (not rescaled) Returns @@ -74,53 +74,53 @@ def nonparam_match_empirical_cdf(initial_array, target_array, nan_indices=None): The matched array of the same shape as the initial array. """ - if np.any(~np.isfinite(initial_array)): - raise ValueError("initial array contains non-finite values") + if np.all(np.isnan(initial_array)): + raise ValueError("Initial array contains only nans.") if initial_array.size != target_array.size: raise ValueError( "dimension mismatch between initial_array and target_array: " f"initial_array.shape={initial_array.shape}, target_array.shape={target_array.shape}" ) - initial_array = np.array(initial_array) - target_array = np.array(target_array) + initial_array_copy = np.array(initial_array, dtype=float) + target_array = np.array(target_array, dtype=float) - # zeros in initial array - zvalue = np.nanmin(initial_array) - - # zeros in the target array - zvalue_trg = np.nanmin(target_array) + # Determine zero in initial array and set nans to zero + zvalue = np.nanmin(initial_array_copy) + if ignore_indices is not None: + initial_array_copy[ignore_indices] = zvalue + # Check if there are still nans left after setting the values at ignore_indices to zero. + if np.any(~np.isfinite(initial_array_copy)): + raise ValueError( + "Initial array contains non-finite values outside ignore_indices mask." + ) - # Apply the masks to the arrays - set all values outside it to zero. - if nan_indices is not None: - initial_array_clipped = initial_array.copy() - initial_array_clipped[nan_indices] = zvalue - # replace nans in target array by zvalue_trg - target_array = np.where(np.isnan(target_array), zvalue_trg, target_array) + idxzeros = initial_array_copy == zvalue - idxzeros = initial_array_clipped == zvalue + # Determine zero of target_array and set nans to zero. + zvalue_trg = np.nanmin(target_array) + target_array = np.where(np.isnan(target_array), zvalue_trg, target_array) # adjust the fraction of rain in target distribution if the number of - # nonzeros is greater than in the initial array - if np.sum(target_array > zvalue_trg) > np.sum(initial_array > zvalue): - war = np.sum(initial_array > zvalue) / initial_array.size + # nonzeros is greater than in the initial array (the lowest values will be set to zero) + if np.sum(target_array > zvalue_trg) > np.sum(initial_array_copy > zvalue): + war = np.sum(initial_array_copy > zvalue) / initial_array_copy.size p = np.percentile(target_array, 100 * (1 - war)) - target_array = target_array.copy() target_array[target_array < p] = zvalue_trg # flatten the arrays - arrayshape = initial_array.shape + arrayshape = initial_array_copy.shape target_array = target_array.flatten() - initial_array_clipped = initial_array_clipped.flatten() + initial_array_copy = initial_array_copy.flatten() # rank target values order = target_array.argsort() ranked = target_array[order] # rank initial values order - orderin = initial_array_clipped.argsort() - ranks = np.empty(len(initial_array_clipped), int) - ranks[orderin] = np.arange(len(initial_array_clipped)) + orderin = initial_array_copy.argsort() + ranks = np.empty(len(initial_array_copy), int) + ranks[orderin] = np.arange(len(initial_array_copy)) # get ranked values from target and rearrange with the initial order output_array = ranked[ranks] @@ -132,8 +132,8 @@ def nonparam_match_empirical_cdf(initial_array, target_array, nan_indices=None): output_array[idxzeros] = zvalue_trg # Put back the original values outside the nan-mask of the target array. - output_array[nan_indices] = initial_array[nan_indices] - + if ignore_indices is not None: + output_array[ignore_indices] = initial_array[ignore_indices] return output_array From cee2e92e193ddacbcb815f3f1d964a04628f7176 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Fri, 23 Aug 2024 08:03:35 +0200 Subject: [PATCH 4/9] Update comments on probability matching. --- pysteps/blending/steps.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 315b61ce6..7e4535dd2 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1538,13 +1538,12 @@ def worker(j): # If probmatching_method is not None, resample the distribution from # both the extrapolation cascade and the model (NWP) cascade and use - # that for the probability matching + # that for the probability matching. if probmatching_method is not None and resample_distribution: arr1 = R_pm_ep[t_index] arr2 = precip_models_pm_temp[j] - # arr2 = np.where(np.isnan(arr2), np.nanmin(arr2), arr2) - # arr1 = np.where(np.isnan(arr1), arr2, arr1) - # resample weights based on cascade level 2 + # resample weights based on cascade level 2. + # Areas where one of the fields is nan are not included. R_pm_resampled = probmatching.resample_distributions( first_array=arr1, second_array=arr2, @@ -1556,8 +1555,8 @@ def worker(j): if probmatching_method == "cdf": # nan indices in the extrapolation nowcast nan_indices = np.isnan(R_pm_ep[t_index]) - # Adjust the CDF of the forecast to match the most recent - # benchmark rainfall field (R_pm_blended). + # Adjust the CDF of the forecast to match the resampled distribution combined from + # extrapolation and model fields. # Rainfall outside the pure extrapolation domain is not taken into account. if np.any(np.isfinite(R_f_new)): R_f_new = probmatching.nonparam_match_empirical_cdf( From c1557e6392a47abe2f51170b9857a8f8c52db0c2 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Fri, 23 Aug 2024 08:03:57 +0200 Subject: [PATCH 5/9] Add extensive tests for resampling and probability matching Especially with nans / ignore mask. --- .../tests/test_postprocessing_probmatching.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py index 87d21ab4d..eebba357b 100644 --- a/pysteps/tests/test_postprocessing_probmatching.py +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -1,6 +1,7 @@ import numpy as np import pytest from pysteps.postprocessing.probmatching import resample_distributions +from pysteps.postprocessing.probmatching import nonparam_match_empirical_cdf class TestResampleDistributions: @@ -98,3 +99,95 @@ def test_nan_in_both_prob_0(self): ) expected_result = np.array([np.nan, np.nan, np.nan, 10, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) + + +class TestNonparamMatchEmpiricalCDF: + @pytest.fixture(autouse=True) + def setup(self): + # Set the seed for reproducibility + np.random.seed(42) + + def test_ignore_indices_with_nans_both(self): + initial_array = np.array([np.nan, np.nan, 6, 2, 0, 0, 0, 0, 0, 0]) + target_array = np.array([np.nan, np.nan, 9, 5, 4, 0, 0, 0, 0, 0]) + result = nonparam_match_empirical_cdf( + initial_array, target_array, ignore_indices=np.isnan(initial_array) + ) + expected_result = np.array([np.nan, np.nan, 9, 5, 0, 0, 0, 0, 0, 0]) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_zeroes_initial(self): + initial_array = np.zeros(10) + target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + result = nonparam_match_empirical_cdf(initial_array, target_array) + expected_result = np.zeros(10) + assert np.allclose(result, expected_result) + + def test_nans_initial(self): + initial_array = np.array( + [0, 1, 2, 3, 4, np.nan, np.nan, np.nan, np.nan, np.nan] + ) + target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + with pytest.raises( + ValueError, + match="Initial array contains non-finite values outside ignore_indices mask.", + ): + nonparam_match_empirical_cdf(initial_array, target_array) + + def test_all_nans_initial(self): + initial_array = np.full(10, np.nan) + target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + with pytest.raises(ValueError, match="Initial array contains only nans."): + nonparam_match_empirical_cdf(initial_array, target_array) + + def test_ignore_indices_nans_initial(self): + initial_array = np.array( + [0, 1, 2, 3, 4, np.nan, np.nan, np.nan, np.nan, np.nan] + ) + target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + result = nonparam_match_empirical_cdf( + initial_array, target_array, ignore_indices=np.isnan(initial_array) + ) + expected_result = np.array( + [0, 7, 8, 9, 10, np.nan, np.nan, np.nan, np.nan, np.nan] + ) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_ignore_indices_nans_target(self): + # We expect the initial_array values for which ignore_indices is true to be conserved as-is. + initial_array = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + target_array = np.array( + [0, 2, 3, 4, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] + ) + result = nonparam_match_empirical_cdf( + initial_array, target_array, ignore_indices=np.isnan(target_array) + ) + expected_result = np.array([0, 2, 3, 4, 4, 5, 6, 7, 8, 9]) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_more_zeroes_in_initial(self): + initial_array = np.array([1, 4, 0, 0, 0, 0, 0, 0, 0, 0]) + target_array = np.array([10, 8, 6, 4, 2, 0, 0, 0, 0, 0]) + result = nonparam_match_empirical_cdf( + initial_array, target_array, ignore_indices=np.isnan(initial_array) + ) + expected_result = np.array([8, 10, 0, 0, 0, 0, 0, 0, 0, 0]) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_more_zeroes_in_initial_unsrt(self): + initial_array = np.array([1, 4, 0, 0, 0, 0, 0, 0, 0, 0]) + target_array = np.array([6, 4, 2, 0, 0, 0, 0, 0, 10, 8]) + result = nonparam_match_empirical_cdf( + initial_array, target_array, ignore_indices=np.isnan(initial_array) + ) + expected_result = np.array([8, 10, 0, 0, 0, 0, 0, 0, 0, 0]) + assert np.allclose(result, expected_result, equal_nan=True) + + def test_more_zeroes_in_target(self): + initial_array = np.array([1, 3, 7, 5, 0, 0, 0, 0, 0, 0]) + target_array = np.array([10, 8, 0, 0, 0, 0, 0, 0, 0, 0]) + result = nonparam_match_empirical_cdf( + initial_array, target_array, ignore_indices=np.isnan(initial_array) + ) + expected_result = np.array([0, 0, 10, 8, 0, 0, 0, 0, 0, 0]) + assert np.allclose(result, expected_result, equal_nan=True) From 54afaf7979a75607d40992371c9714d116cf1b39 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Mon, 26 Aug 2024 22:51:23 +0200 Subject: [PATCH 6/9] Update documentation about ignore_indices. --- pysteps/postprocessing/probmatching.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index fb61834e5..c75048f43 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -66,7 +66,10 @@ def nonparam_match_empirical_cdf(initial_array, target_array, ignore_indices=Non target_array: array_like The target array ignore_indices: array_like, optional - Indices of pixels in the initial_array which are to be ignored (not rescaled) + Indices of pixels in the initial_array which are to be ignored (not + rescaled) or an array of booleans with True at the pixel locations to + be ignored in initial_array and False elsewhere. + Returns ------- From 7d0b1270424608c0d298d333a668b4019438f77a Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Mon, 26 Aug 2024 23:15:00 +0200 Subject: [PATCH 7/9] Don't copy arrays ad nauseam in nonparam_match_empirical_cdf --- pysteps/postprocessing/probmatching.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index c75048f43..a75458152 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -111,10 +111,9 @@ def nonparam_match_empirical_cdf(initial_array, target_array, ignore_indices=Non p = np.percentile(target_array, 100 * (1 - war)) target_array[target_array < p] = zvalue_trg - # flatten the arrays - arrayshape = initial_array_copy.shape - target_array = target_array.flatten() - initial_array_copy = initial_array_copy.flatten() + # flatten the arrays without copying them + target_array.reshape(-1) + initial_array_copy.reshape(-1) # rank target values order = target_array.argsort() From a01946c2d126bd05ea14bb6f1e716e957d63ab18 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Mon, 26 Aug 2024 23:27:39 +0200 Subject: [PATCH 8/9] Add missing declaration of array_shape before reshaping --- pysteps/postprocessing/probmatching.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index a75458152..72db9a8c9 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -112,6 +112,7 @@ def nonparam_match_empirical_cdf(initial_array, target_array, ignore_indices=Non target_array[target_array < p] = zvalue_trg # flatten the arrays without copying them + arrayshape = initial_array_copy.shape target_array.reshape(-1) initial_array_copy.reshape(-1) From df2ce0ecd4bb2244b7c1890180e90ae3859b8143 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Mon, 26 Aug 2024 23:52:05 +0200 Subject: [PATCH 9/9] Don't forget to assign new views on flattened arrays; also add test for 2D data for pmatching --- pysteps/postprocessing/probmatching.py | 4 ++-- pysteps/tests/test_postprocessing_probmatching.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index 72db9a8c9..afc951542 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -113,8 +113,8 @@ def nonparam_match_empirical_cdf(initial_array, target_array, ignore_indices=Non # flatten the arrays without copying them arrayshape = initial_array_copy.shape - target_array.reshape(-1) - initial_array_copy.reshape(-1) + target_array = target_array.reshape(-1) + initial_array_copy = initial_array_copy.reshape(-1) # rank target values order = target_array.argsort() diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py index eebba357b..8c7c12f4f 100644 --- a/pysteps/tests/test_postprocessing_probmatching.py +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -191,3 +191,10 @@ def test_more_zeroes_in_target(self): ) expected_result = np.array([0, 0, 10, 8, 0, 0, 0, 0, 0, 0]) assert np.allclose(result, expected_result, equal_nan=True) + + def test_2dim_array(self): + initial_array = np.array([[1, 3, 5], [11, 9, 7]]) + target_array = np.array([[2, 4, 6], [8, 10, 12]]) + result = nonparam_match_empirical_cdf(initial_array, target_array) + expected_result = np.array([[2, 4, 6], [12, 10, 8]]) + assert np.allclose(result, expected_result, equal_nan=True)