Skip to content

2D bounds - simple version #370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 51 additions & 13 deletions cf_xarray/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,17 +461,37 @@ def wrapper(obj: DataArray | Dataset, key: str):
}


def _guess_bounds_dim(da, dim=None):
def _guess_bounds_dim(da, dim=None, out_dim="bounds"):
"""
Guess bounds values given a 1D coordinate variable.
Guess bounds values given a 1D or 2D coordinate variable.
Assumes equal spacing on either side of the coordinate label.
This is a coarse approximation, especially for 2D bounds on curvilinear grids.
"""
if dim is None:
if da.ndim != 1:
if da.ndim not in [1, 2]:
raise ValueError(
f"If dim is None, variable {da.name} must be 1D. Received {da.ndim}D variable instead."
f"If dim is None, variable {da.name} must be 1D or 2D. Received {da.ndim}D variable instead."
)
(dim,) = da.dims
dim = da.dims
if not isinstance(dim, str):
if len(dim) > 2:
raise NotImplementedError(
"Adding bounds with more than 2 dimensions is not supported."
)
elif len(dim) == 2:
daX = _guess_bounds_dim(da, dim[0]).rename(bounds="Xbnds")
daXY = _guess_bounds_dim(daX, dim[1]).rename(bounds="Ybnds")
return xr.concat(
[
daXY.isel(Xbnds=0, Ybnds=0),
daXY.isel(Xbnds=0, Ybnds=1),
daXY.isel(Xbnds=1, Ybnds=1),
daXY.isel(Xbnds=1, Ybnds=0),
],
out_dim,
)
else:
dim = dim[0]
if dim not in da.dims:
(dim,) = da.cf.axes[dim]
if dim not in da.coords:
Expand All @@ -482,7 +502,7 @@ def _guess_bounds_dim(da, dim=None):
diff = da.diff(dim)
lower = da - diff / 2
upper = da + diff / 2
bounds = xr.concat([lower, upper], dim="bounds")
bounds = xr.concat([lower, upper], dim=out_dim)

first = (bounds.isel({dim: 0}) - diff.isel({dim: 0})).assign_coords(
{dim: da[dim][0]}
Expand Down Expand Up @@ -2169,18 +2189,28 @@ def get_bounds_dim_name(self, key: str) -> str:
assert self._obj.sizes[bounds_dim] in [2, 4]
return bounds_dim

def add_bounds(self, keys: str | Iterable[str], *, dim=None):
def add_bounds(
self,
keys: str | Iterable[str],
*,
dim: str | Iterable[str] | None = None,
output_dim: str = "bounds",
):
"""
Returns a new object with bounds variables. The bounds values are guessed assuming
equal spacing on either side of a coordinate label.
equal spacing on either side of a coordinate label. The linear estimation is only a
coarse approximation, especially 2D bounds on curvilinear grids. It is always better to use
bounds generated as part of the grid creation process. This method is purely for convenience.

Parameters
----------
keys : str or Iterable[str]
Either a single variable name or a list of variable names.
dim : str, optional
Core dimension along whch to estimate bounds. If None, ``keys``
must refer to 1D variables only.
dim : str or Iterable[str], optional
Core dimension(s) along which to estimate bounds. For 2D bounds, it can
be a list of 2 dimension names.
output_dim : str
The name of the bounds dimension to add.

Returns
-------
Expand Down Expand Up @@ -2226,9 +2256,17 @@ def add_bounds(self, keys: str | Iterable[str], *, dim=None):
bname = f"{var}_bounds"
if bname in obj.variables:
raise ValueError(f"Bounds variable name {bname!r} will conflict!")
obj.coords[bname] = _guess_bounds_dim(
obj[var].reset_coords(drop=True), dim=dim
out = _guess_bounds_dim(
obj[var].reset_coords(drop=True), dim=dim, out_dim=output_dim
)
if output_dim in obj.dims and (new := out[output_dim].size) != (
old := obj[output_dim].size
):
raise ValueError(
f"The `{output_dim}` dimension already exists but has a different length than the new one "
f"({old} vs {new}). Please provide another bound dimension name with `output_dim`."
)
obj.coords[bname] = out
obj[var].attrs["bounds"] = bname

return self._maybe_to_dataarray(obj)
Expand Down
19 changes: 16 additions & 3 deletions cf_xarray/tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from xarray.testing import assert_allclose, assert_identical

import cf_xarray # noqa
from cf_xarray.helpers import vertices_to_bounds
from cf_xarray.utils import parse_cf_standard_name_table

from ..datasets import (
Expand Down Expand Up @@ -799,26 +800,38 @@ def test_add_bounds_multiple():


def test_add_bounds_nd_variable():

ds = xr.Dataset(
{"z": (("x", "y"), np.arange(12).reshape(4, 3))},
coords={"x": np.arange(4), "y": np.arange(3)},
)

# 2D
expected = (
vertices_to_bounds(
np.arange(0, 13, 3).reshape(5, 1) + np.arange(-2, 2).reshape(1, 4)
)
.rename("z_bounds")
.assign_coords(**ds.coords)
)
actual = ds.cf.add_bounds("z").z_bounds.reset_coords(drop=True)
xr.testing.assert_identical(actual, expected)

# 1D
expected = (
xr.concat([ds.z - 1.5, ds.z + 1.5], dim="bounds")
.rename("z_bounds")
.transpose("bounds", "y", "x")
)
with pytest.raises(ValueError):
ds.cf.add_bounds("z")

actual = ds.cf.add_bounds("z", dim="x").z_bounds.reset_coords(drop=True)
xr.testing.assert_identical(expected, actual)

with pytest.raises(NotImplementedError):
ds.drop_vars("x").cf.add_bounds("z", dim="x")

with pytest.raises(ValueError, match="The `bounds` dimension already exists"):
ds.cf.add_bounds("z").cf.add_bounds("x")


def test_bounds():
ds = airds.copy(deep=False).cf.add_bounds("lat")
Expand Down
Binary file added doc/2D_bounds_error.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 6 additions & 0 deletions doc/bounds.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,9 @@ See
1. {py:func}`Dataset.cf.add_bounds`,
1. {py:func}`cf_xarray.bounds_to_vertices`,
1. {py:func}`cf_xarray.vertices_to_bounds`

`cf_xarray` supports parsing [coordinate bounds](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#cell-boundaries) as encoded in the CF `bounds` attribute. A useful feature for incomplete dataset is also the automatic bounds estimation possible through `cf.add_bounds`. This method will estimate the missing bounds by finding the middle points between elements of the given coordinate, but also by extrapolating to find the outer bounds of the grid. This linear estimation works well with rectilinear grids, but it is only a coarse approximation for curvilinear and simple irregular grids.

As an example, we present a "rotated pole" grid. It is defined on a rotated rectilinear grid which uses the `rlat` and `rlon` 1D coordinates, over North America at a resolution of 0.44°. The datasets comes with 2D `lat` and `lon` coordinates. `cf_xarray` will estimate the bounds by linear interpolation (extrapolation at the edges) of the existing `lon` and `lat`, which yields good results on parts of the grid where the rotation is small. However the errors is larger in other places, as seen when visualizing the distance in degrees between the estimated bounds and the true bounds.

![2d bounds error](2D_bounds_error.png)
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
What's New
----------

v0.7.5 (unreleased)
===================
- ``cf.add_bounds`` can estimate 2D bounds using an approximate linear interpolation (:pr:`370`).
By `Pascal Bourgault`_.

v0.7.4 (July 14, 2022)
======================

Expand Down