diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 940f62cd..cc8617d9 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -207,6 +207,11 @@ def _get_custom_criteria( List[str], Variable name(s) in parent xarray object that matches axis, coordinate, or custom `key` """ + try: + from regex import match as regex_match + except ImportError: + from re import match as regex_match # type: ignore + if isinstance(obj, DataArray): obj = obj._to_temp_dataset() @@ -223,13 +228,13 @@ def _get_custom_criteria( if key in criteria_map: for criterion, patterns in criteria_map[key].items(): for var in obj.variables: - if re.match(patterns, obj[var].attrs.get(criterion, "")): + if regex_match(patterns, obj[var].attrs.get(criterion, "")): results.update((var,)) # also check name specifically since not in attributes elif ( criterion == "name" and isinstance(var, str) - and re.match(patterns, var) + and regex_match(patterns, var) ): results.update((var,)) return list(results) diff --git a/cf_xarray/tests/__init__.py b/cf_xarray/tests/__init__.py index eca25d79..3ffcafcd 100644 --- a/cf_xarray/tests/__init__.py +++ b/cf_xarray/tests/__init__.py @@ -67,3 +67,4 @@ def LooseVersion(vstring): has_scipy, requires_scipy = _importorskip("scipy") has_shapely, requires_shapely = _importorskip("shapely") has_pint, requires_pint = _importorskip("pint") +has_regex, requires_regex = _importorskip("regex") diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 4d97ec53..118558b8 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -34,7 +34,13 @@ rotds, vert, ) -from . import raise_if_dask_computes, requires_cftime, requires_pint, requires_scipy +from . import ( + raise_if_dask_computes, + requires_cftime, + requires_pint, + requires_regex, + requires_scipy, +) mpl.use("Agg") @@ -1585,6 +1591,17 @@ def test_custom_criteria() -> None: assert_identical(ds.cf["temp"], ds["temperature"]) +@requires_regex +def test_regex_match(): + # test that having a global regex expression flag later in the expression will work if + # regex is found + vocab = {"temp": {"name": "tem|(?i)temp"}} + ds = xr.Dataset() + ds["Tempblah"] = [0, 1, 2] + with cf_xarray.set_options(custom_criteria=vocab): + assert_identical(ds.cf["temp"], ds["Tempblah"]) + + def test_cf_standard_name_table_version() -> None: url = ( diff --git a/ci/doc.yml b/ci/doc.yml index 56a435e2..4b8620ce 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -18,6 +18,7 @@ dependencies: - pandas - pooch - pint + - regex - furo - pip: - git+https://github.com/xarray-contrib/cf-xarray diff --git a/ci/environment.yml b/ci/environment.yml index 8b138c6d..39a284bb 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -13,6 +13,7 @@ dependencies: - pandas - pint - pooch + - regex - scipy - shapely - xarray diff --git a/doc/custom-criteria.md b/doc/custom-criteria.md index e4c8e12d..f6daa08d 100644 --- a/doc/custom-criteria.md +++ b/doc/custom-criteria.md @@ -102,3 +102,20 @@ cfxr.set_options(custom_criteria=salt_criteria) ds.cf[["salinity"]] ``` + +## More complex matches with `regex` + +Here is an example of a more complicated custom criteria, which requires the package [`regex`](https://github.com/mrabarnett/mrab-regex) to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status". + +```{code-cell} +import cf_xarray as cfxr +import xarray as xr + +vocab = {"sea_ice_u": {"name": "(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*u)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*x)(?=.*vel)"}} +ds = xr.Dataset() +ds["sea_ice_velocity_x"] = [0,1,2] + +with cfxr.set_options(custom_criteria=vocab): + seaiceu = ds.cf["sea_ice_u"] +seaiceu +``` diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c4c391c8..4471ea67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,6 +3,11 @@ What's New ---------- +v0.7.8 (unreleased) +=================== + +- Optionally use the `regex` package to continue supporting global flags in regular expressions that are not at start of pattern. (:pr:`408`). By `Kristen Thyng`_ + v0.7.7 (Jan 14, 2023) ===================== diff --git a/pyproject.toml b/pyproject.toml index 5fdb5348..a4ddb772 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -all = ["matplotlib", "pint", "shapely"] +all = ["matplotlib", "pint", "shapely", "regex"] [project.urls] homepage = "https://cf-xarray.readthedocs.io"