diff --git a/CHANGELOG.md b/CHANGELOG.md index 07d6e2869..a2edc903d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ Code freeze date: YYYY-MM-DD ### Added ### Changed +- Update `util.coordinates.match_centroids` and `util.coordinates.match_coordinates` so that they also +accept coordinates that are not defined in degree. +- Implement cheap test to check that input coordinates at least seem geographic for functions that require +geographic coordinates as input (e.g. `util.coordinates.dist_to_coast`, `util.coordinates.coord_on_land`). - `Hazard.local_exceedance_intensity`, `Hazard.local_return_period` and `Impact.local_exceedance_impact`, `Impact.local_return_period`, using the `climada.util.interpolation` module: New default (no binning), binning on decimals, and faster implementation [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012) ### Fixed diff --git a/climada/hazard/test/test_tc_tracks.py b/climada/hazard/test/test_tc_tracks.py index f1943c7f3..ced0dc1c7 100644 --- a/climada/hazard/test/test_tc_tracks.py +++ b/climada/hazard/test/test_tc_tracks.py @@ -1341,7 +1341,7 @@ def test_track_land_params(self): lon_test = np.array([170, 179.18, 180.05]) lat_test = np.array([-60, -16.56, -16.85]) on_land = np.array([False, True, True]) - lon_shift = np.array([-360, 0, 360]) + lon_shift = np.array([360, 360, 360]) # ensure both points are considered on land as is np.testing.assert_array_equal( u_coord.coord_on_land(lat=lat_test, lon=lon_test), on_land diff --git a/climada/hazard/trop_cyclone/trop_cyclone_windfields.py b/climada/hazard/trop_cyclone/trop_cyclone_windfields.py index f8ac09078..86a2e144a 100644 --- a/climada/hazard/trop_cyclone/trop_cyclone_windfields.py +++ b/climada/hazard/trop_cyclone/trop_cyclone_windfields.py @@ -902,6 +902,8 @@ def _coriolis_parameter(lat: np.ndarray) -> np.ndarray: cp : np.ndarray of same shape as input Coriolis parameter. """ + u_coord.check_if_geo_coords(lat, 0) + return 2 * V_ANG_EARTH * np.sin(np.radians(np.abs(lat))) diff --git a/climada/util/coordinates.py b/climada/util/coordinates.py index 5b2a72cf6..feee3fef5 100644 --- a/climada/util/coordinates.py +++ b/climada/util/coordinates.py @@ -85,6 +85,75 @@ are not considered.""" +def check_if_geo_coords(lat, lon): + """Check if latitude and longitude arrays are likely in geographic coordinates, + testing if min/max values are within -90 to 90 for latitude and -540 to 540 + for longitude. Lon coordinates of <-360 or >360 are allowed to cover cases + of objects being defined close to the 180 meridian for which longitude + intervals such as [179, 538] or [-538, -179] might be used. + + Parameters + ---------- + lat, lon : ndarrays of floats, same shape + Latitudes and longitudes of points. + + Returns + ------- + test : bool + True if lat/lon ranges seem to be in the geographic coordinates range, otherwise False. + """ + lat = np.array(lat) + lon = np.array(lon) + + # Check if latitude is within -90 to 90 and longitude is within -540 to 540 + # and extent are smaller than 180 and 360 respectively + like_geo = ( + lat.min() >= -91 and lat.max() <= 91 and lon.min() >= -541 and lon.max() <= 541 + ) and ((lat.max() - lat.min()) <= 181 and (lon.max() - lon.min()) <= 361) + + if not like_geo: + raise ValueError( + "Input lat and lon coordinates do not seem to correspond" + " to geographic coordinates in degrees. This can be because" + " total extents are > 180 for lat or > 360 for lon, lat coordinates" + " are outside of -90= 0 else 100) return epsg_utm_base + (math.floor((lon + 180) / 6) % 60) @@ -564,6 +640,7 @@ def dist_to_coast(coord_lat, lon=None, highres=False, signed=False): raise ValueError( f"Mismatching input coordinates size: {lat.size} != {lon.size}" ) + check_if_geo_coords(lat, lon) return dist_to_coast_nasa(lat, lon, highres=highres, signed=signed) @@ -679,6 +756,7 @@ def coord_on_land(lat, lon, land_geom=None): ) if lat.size == 0: return np.empty((0,), dtype=bool) + check_if_geo_coords(lat, lon) delta_deg = 1 lons = lon.copy() if land_geom is None: @@ -796,10 +874,7 @@ def get_country_geometries( out = out[country_mask] if extent: - if extent[1] - extent[0] > 360: - raise ValueError( - f"longitude extent range is greater than 360: {extent[0]} to {extent[1]}" - ) + check_if_geo_coords(extent[2:], extent[:2]) if extent[1] < extent[0]: raise ValueError( @@ -986,6 +1061,7 @@ def match_coordinates( coords, coords_to_assign, distance="euclidean", + unit="degree", threshold=NEAREST_NEIGHBOR_THRESHOLD, **kwargs, ): @@ -995,14 +1071,8 @@ def match_coordinates( nearest neighbor. If the distance to the nearest neighbor exceeds `threshold`, the index `-1` is assigned. - Currently, the nearest neighbor matching works with lat/lon coordinates only. However, you can - disable nearest neighbor matching by setting `threshold` to 0, in which case only exactly - matching coordinates are assigned to each other. - - Make sure that all coordinates are according to the same coordinate reference system. In case - of lat/lon coordinates, the "haversine" distance is able to correctly compute the distance - across the antimeridian. However, when exact matches are enforced with `threshold=0`, lat/lon - coordinates need to be given in the same longitudinal range (such as (-180, 180)). + You can disable nearest neighbor matching by setting `threshold` to 0, in which case + only exactly matching coordinates are assigned to each other. Parameters ---------- @@ -1016,9 +1086,12 @@ def match_coordinates( distance : str, optional Distance to use for non-exact matching. Possible values are "euclidean", "haversine" and "approx". Default: "euclidean" + unit : str, optional + Unit to use for non-exact matching. Possible values are "degree", "m", "km". + Default: "degree" threshold : float, optional If the distance to the nearest neighbor exceeds `threshold`, the index `-1` is assigned. - Set `threshold` to 0 to disable nearest neighbor matching. Default: 100 (km) + Set `threshold` to 0 to disable nearest neighbor matching. Default: 100 (km or crs.unit) kwargs: dict, optional Keyword arguments to be passed on to nearest-neighbor finding functions in case of non-exact matching with the specified `distance`. @@ -1037,6 +1110,17 @@ def match_coordinates( surface, in particular for higher latitude and distances larger than 100km. If more accuracy is needed, please use the 'haversine' distance metric. This however is slower for (quasi-)gridded data. + + The nearest neighbor matching works with lat/lon coordinates (in degrees), and projected coordinates + (e.g. meters, kilometers). However, projected coordinates, require "euclidean" distance to be used + for nearest neighbor matching. Furthermore, the handling of antimeridian crossing is disabled + (check_antimeridian=False) and a warning is raised when the user attempts to use this option with + a non-degree coordinates system. + + Make sure that all coordinates are according to the same coordinate reference system. In case + of lat/lon coordinates, the "haversine" distance is able to correctly compute the distance + across the antimeridian. However, when exact matches are enforced with `threshold=0`, lat/lon + coordinates need to be given in the same longitudinal range (such as (-180, 180)). """ if coords.shape[0] == 0: return np.array([]) @@ -1089,15 +1173,36 @@ def match_coordinates( # assign remaining coordinates to their geographically nearest neighbor if threshold > 0 and exact_assign_idx.size != coords_view.size: + if unit == "degree": + # check that coords at least seem geographic before proceeding to nearest neighbor + check_if_geo_coords(coords[:, 0], coords[:, 1]) + check_if_geo_coords(coords_to_assign[:, 0], coords_to_assign[:, 1]) + elif unit != "km": + LOGGER.warning( + "You are using coordinates systems defined in %s. " + "The following distance threshold will be used for coordinates " + "matching: %i %s. Please adapt the distance threshold if needed. ", + unit, + threshold, + unit, + ) + not_assigned_idx_mask = assigned_idx == -1 assigned_idx[not_assigned_idx_mask] = nearest_neighbor_funcs[distance]( - coords_to_assign, coords[not_assigned_idx_mask], threshold, **kwargs + coords_to_assign, + coords[not_assigned_idx_mask], + unit, + threshold, + **kwargs, ) return assigned_idx def match_centroids( - coord_gdf, centroids, distance="euclidean", threshold=NEAREST_NEIGHBOR_THRESHOLD + coord_gdf, + centroids, + distance="euclidean", + threshold=NEAREST_NEIGHBOR_THRESHOLD, ): """Assign to each gdf coordinate point its closest centroids's coordinate. If distances > threshold in points' distances, -1 is returned. @@ -1114,10 +1219,11 @@ def match_centroids( Possible values are "euclidean", "haversine" and "approx". Default: "euclidean" threshold : float, optional - If the distance (in km) to the nearest neighbor exceeds `threshold`, + If the distance (in km for coordinates defined in degrees, in the unit of + the crs otherwise) to the nearest neighbor exceeds `threshold`, the index `-1` is assigned. Set `threshold` to 0, to disable nearest neighbor matching. - Default: ``NEAREST_NEIGHBOR_THRESHOLD`` (100km) + Default: ``NEAREST_NEIGHBOR_THRESHOLD`` (100) See Also -------- @@ -1141,20 +1247,36 @@ def match_centroids( larger than 100km. If more accuracy is needed, please use 'haversine' distance metric. This however is slower for (quasi-)gridded data, and works only for non-gridded data. + + Projected coordinates (defined in units other than degrees) are supported. + However, nearest neighbor matching for projected coordinates is only possible + using 'euclidean' distance, and handling of coordinates crossing the antimeridian + is not implemented. """ try: if not equal_crs(coord_gdf.crs, centroids.crs): - raise ValueError("Set hazard and GeoDataFrame to same CRS first!") - except AttributeError: - # If the coord_gdf has no crs defined (or no valid geometry column), - # no error is raised and it is assumed that the user set the crs correctly - pass + raise ValueError( + "Please provide coord_gdf and centroids defined in a common CRS." + ) + if coord_gdf.crs is None or centroids.crs is None: + raise ValueError( + "Please provide coord_gdf and centroids with valid crs attributes." + ) + except AttributeError as exc: + # a crs attribute is needed for unit inference + raise ValueError( + "Please provide coord_gdf and centroids with valid crs attributes." + ) from exc + + # get unit of coordinate systems from axis of crs + unit = get_crs_unit(coord_gdf) assigned = match_coordinates( np.stack([coord_gdf.geometry.y.values, coord_gdf.geometry.x.values], axis=1), centroids.coord, distance=distance, + unit=unit, threshold=threshold, ) return assigned @@ -1170,11 +1292,11 @@ def _dist_sqr_approx(lats1, lons1, cos_lats1, lats2, lons2): def _nearest_neighbor_approx( - centroids, coordinates, threshold, check_antimeridian=True + centroids, coordinates, unit, threshold, check_antimeridian=True ): """Compute the nearest centroid for each coordinate using the - euclidean distance d = ((dlon)cos(lat))^2+(dlat)^2. For distant points - (e.g. more than 100km apart) use the haversine distance. + squared equirectangular approximation distance d = ((dlon)cos(lat))^2+(dlat)^2. + For distant points (e.g. more than 100km apart) use the haversine distance. Parameters ---------- @@ -1184,6 +1306,8 @@ def _nearest_neighbor_approx( coordinates : 2d array First column contains latitude, second column contains longitude. Each row is a geographic point + unit : str + Unit to use for non-exact matching. Only possible value is "degree" threshold : float distance threshold in km over which no neighbor will be found. Those are assigned with a -1 index @@ -1198,7 +1322,12 @@ def _nearest_neighbor_approx( np.array with as many rows as coordinates containing the centroids indexes """ - + # first check that unit is in degree + if unit != "degree": + raise ValueError( + "Only degree unit is supported for nearest neighbor matching using" + "'approx' distance." + ) # Compute only for the unique coordinates. Copy the results for the # not unique coordinates _, idx, inv = np.unique(coordinates, axis=0, return_index=True, return_inverse=True) @@ -1239,7 +1368,7 @@ def _nearest_neighbor_approx( return assigned -def _nearest_neighbor_haversine(centroids, coordinates, threshold): +def _nearest_neighbor_haversine(centroids, coordinates, unit, threshold): """Compute the neareast centroid for each coordinate using a Ball tree with haversine distance. Parameters @@ -1250,6 +1379,8 @@ def _nearest_neighbor_haversine(centroids, coordinates, threshold): coordinates : 2d array First column contains latitude, second column contains longitude. Each row is a geographic point + unit : str + Unit to use for non-exact matching. Only possible value is "degree" threshold : float distance threshold in km over which no neighbor will be found. Those are assigned with a -1 index @@ -1259,6 +1390,12 @@ def _nearest_neighbor_haversine(centroids, coordinates, threshold): np.array with as many rows as coordinates containing the centroids indexes """ + # first check that unit is in degree + if unit != "degree": + raise ValueError( + "Only coordinates in degree units are supported for nearest neighbor matching using" + "'haversine' distance." + ) # Construct tree from centroids tree = BallTree(np.radians(centroids), metric="haversine") # Select unique exposures coordinates @@ -1279,21 +1416,22 @@ def _nearest_neighbor_haversine(centroids, coordinates, threshold): # Raise a warning if the minimum distance is greater than the # threshold and set an unvalid index -1 - num_warn = np.sum(dist * EARTH_RADIUS_KM > threshold) + dist = dist * EARTH_RADIUS_KM + num_warn = np.sum(dist > threshold) if num_warn: LOGGER.warning( - "Distance to closest centroid is greater than %s" "km for %s coordinates.", + "Distance to closest centroid is greater than %i km for %i coordinates.", threshold, num_warn, ) - assigned[dist * EARTH_RADIUS_KM > threshold] = -1 + assigned[dist > threshold] = -1 # Copy result to all exposures and return value return assigned[inv] def _nearest_neighbor_euclidean( - centroids, coordinates, threshold, check_antimeridian=True + centroids, coordinates, unit, threshold, check_antimeridian=True ): """Compute the neareast centroid for each coordinate using a k-d tree. @@ -1305,6 +1443,8 @@ def _nearest_neighbor_euclidean( coordinates : 2d array First column contains latitude, second column contains longitude. Each row is a geographic point + unit : str + Unit to use for non-exact matching. Possible values are "degree", "m", "km". threshold : float distance threshold in km over which no neighbor will be found. Those are assigned with a -1 index @@ -1312,6 +1452,7 @@ def _nearest_neighbor_euclidean( If True, the nearest neighbor in a strip with lon size equal to threshold around the antimeridian is recomputed using the Haversine distance. The antimeridian is guessed from both coordinates and centroids, and is assumed equal to 0.5*(lon_max+lon_min) + 180. + Requires the coordinates to be in degrees. Default: True Returns @@ -1319,24 +1460,49 @@ def _nearest_neighbor_euclidean( np.array with as many rows as coordinates containing the centroids indexes """ + if ( + unit == "degree" + ): # if unit is in degree convert to radians for dist calculations + centroids = np.radians(centroids) + coordinates = np.radians(coordinates) # Construct tree from centroids - tree = scipy.spatial.KDTree(np.radians(centroids)) + tree = scipy.spatial.KDTree(centroids) # Select unique exposures coordinates _, idx, inv = np.unique(coordinates, axis=0, return_index=True, return_inverse=True) # query the k closest points of the n_points using dual tree - dist, assigned = tree.query(np.radians(coordinates[idx]), k=1, p=2, workers=-1) + dist, assigned = tree.query(coordinates[idx], k=1, p=2, workers=-1) + + if unit == "degree": + # convert back to degree for check antimeridian and convert distance + centroids = np.rad2deg(centroids) + coordinates = np.rad2deg(coordinates) + dist = dist * EARTH_RADIUS_KM + threshold_unit = "km" + else: + threshold_unit = ( + unit # the unit of the threshold is considered to be in input unit + ) + if check_antimeridian: + # if unit is not in degree, check_antimeridian is forced to False + check_antimeridian = False + LOGGER.warning( + "Handling of antimeridian crossing is not implemented for non-degree" + " coordinates ('check_antimeridian' has been set to False). Please use" + " degree-based coordinates system if you want to enable antimeridian crossing." + ) # Raise a warning if the minimum distance is greater than the # threshold and set an unvalid index -1 - num_warn = np.sum(dist * EARTH_RADIUS_KM > threshold) + num_warn = np.sum(dist > threshold) if num_warn: LOGGER.warning( - "Distance to closest centroid is greater than %s" "km for %s coordinates.", + "Distance to closest centroid is greater than %i %s for %i coordinates.", threshold, + threshold_unit, num_warn, ) - assigned[dist * EARTH_RADIUS_KM > threshold] = -1 + assigned[dist > threshold] = -1 if check_antimeridian: assigned = _nearest_neighbor_antimeridian( @@ -1389,7 +1555,7 @@ def _nearest_neighbor_antimeridian(centroids, coordinates, threshold, assigned): if np.any(cent_strip_bool): cent_strip = centroids[cent_strip_bool] strip_assigned = _nearest_neighbor_haversine( - cent_strip, coord_strip, threshold + cent_strip, coord_strip, "degree", threshold ) new_coords = cent_strip_bool.nonzero()[0][strip_assigned] new_coords[strip_assigned == -1] = -1 @@ -1595,6 +1761,8 @@ def get_country_code(lat, lon, gridded=False): if lat.size == 0: return np.empty((0,), dtype=int) LOGGER.info("Setting region_id %s points.", str(lat.size)) + # first check that input lat lon are geographic + check_if_geo_coords(lat, lon) if gridded: base_file = u_hdf5.read(NATEARTH_CENTROIDS[150]) meta, region_id = base_file["meta"], base_file["region_id"] diff --git a/climada/util/test/test_coordinates.py b/climada/util/test/test_coordinates.py index 56ba3af91..23c27e61f 100644 --- a/climada/util/test/test_coordinates.py +++ b/climada/util/test/test_coordinates.py @@ -38,7 +38,13 @@ import climada.util.coordinates as u_coord from climada import CONFIG from climada.hazard.base import Centroids -from climada.util.constants import DEF_CRS, DEMO_DIR, HAZ_DEMO_FL, ONE_LAT_KM +from climada.util.constants import ( + DEF_CRS, + DEMO_DIR, + EARTH_RADIUS_KM, + HAZ_DEMO_FL, + ONE_LAT_KM, +) DATA_DIR = CONFIG.util.test_data.dir() @@ -230,6 +236,47 @@ def def_ref_50(): ) +class TestGetUnitCoords(unittest.TestCase): + def create_mock_gdf(self, crs, num_points=5): + geometry = [ + shapely.Point(x, y) for x, y in zip(range(num_points), range(num_points)) + ] + gdf = gpd.GeoDataFrame(geometry=geometry, crs=crs) + return gdf + + def test_get_unit_degree(self): + """Test with a geodetic CRS in degree (EPSG:4326).""" + crs = "EPSG:4326" + gdf = self.create_mock_gdf(crs) + unit = u_coord.get_crs_unit(gdf) + self.assertEqual(unit, "degree", "Expected unit 'degree' for geodetic CRS.") + + def test_get_unit_projected_m(self): + """Test with a projected CRS in m (EPSG:3857).""" + crs = "EPSG:3857" + gdf = self.create_mock_gdf(crs) + unit = u_coord.get_crs_unit(gdf) + self.assertEqual(unit, "m", "Expected unit 'm' for projected CRS.") + + def test_get_unit_projected_km(self): + """Test with a projected CRS in km (EPSG:22300).""" + crs = "EPSG:22300" + gdf = self.create_mock_gdf(crs) + unit = u_coord.get_crs_unit(gdf) + self.assertEqual(unit, "km", "Expected unit 'km' for projected CRS.") + + def test_get_unit_invalid_crs(self): + """Test with an invalid CRS.""" + crs = "EPSG:8189" + gdf = self.create_mock_gdf(crs) + with self.assertRaises(ValueError) as cm: + u_coord.get_crs_unit(gdf) + self.assertIn( + "Unknown unit: US survey foot. Please provide a crs that has a unit of 'degree', 'metre' or 'kilometre'.", + str(cm.exception), + ) + + class TestDistance(unittest.TestCase): """Test distance functions.""" @@ -342,6 +389,33 @@ def data_arrays_resampling_demo(): class TestFunc(unittest.TestCase): """Test auxiliary functions""" + def test_check_geo_coords(self): + """Test the check_if_geo_coords function""" + # test correct geographical coords + lats_lons_true = (np.array([0, -2, 5]), np.array([-179, 175, 178])) + u_coord.check_if_geo_coords(lats_lons_true[0], lats_lons_true[1]) + + # test wrong geographical coords + lats_lons_wrong = ( + (np.array([0, 200, 5]), np.array([-179, 175, 178])), # test wrong lat + (np.array([0, -2, 5]), np.array([-400, 175, 176])), # test wrong lon + (np.array([0, -2, 5]), np.array([700, 1000, 800])), + (np.array([0, -2, 5]), np.array([-179, 175, 370])), # test wrong extent + ) + + for ilat, ilon in lats_lons_wrong: + with self.assertRaises(ValueError) as cm: + u_coord.check_if_geo_coords(ilat, ilon) + self.assertIn( + "Input lat and lon coordinates do not seem to correspond" + " to geographic coordinates in degrees. This can be because" + " total extents are > 180 for lat or > 360 for lon, lat coordinates" + " are outside of -90