Skip to content

Using regex package for match #408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions cf_xarray/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,11 @@ def _get_custom_criteria(
List[str], Variable name(s) in parent xarray object that matches axis, coordinate, or custom `key`
"""

try:
from regex import match as regex_match
except ImportError:
from re import match as regex_match # type: ignore

if isinstance(obj, DataArray):
obj = obj._to_temp_dataset()

Expand All @@ -223,13 +228,13 @@ def _get_custom_criteria(
if key in criteria_map:
for criterion, patterns in criteria_map[key].items():
for var in obj.variables:
if re.match(patterns, obj[var].attrs.get(criterion, "")):
if regex_match(patterns, obj[var].attrs.get(criterion, "")):
results.update((var,))
# also check name specifically since not in attributes
elif (
criterion == "name"
and isinstance(var, str)
and re.match(patterns, var)
and regex_match(patterns, var)
):
results.update((var,))
return list(results)
Expand Down
1 change: 1 addition & 0 deletions cf_xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,4 @@ def LooseVersion(vstring):
has_scipy, requires_scipy = _importorskip("scipy")
has_shapely, requires_shapely = _importorskip("shapely")
has_pint, requires_pint = _importorskip("pint")
has_regex, requires_regex = _importorskip("regex")
19 changes: 18 additions & 1 deletion cf_xarray/tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@
rotds,
vert,
)
from . import raise_if_dask_computes, requires_cftime, requires_pint, requires_scipy
from . import (
raise_if_dask_computes,
requires_cftime,
requires_pint,
requires_regex,
requires_scipy,
)

mpl.use("Agg")

Expand Down Expand Up @@ -1585,6 +1591,17 @@ def test_custom_criteria() -> None:
assert_identical(ds.cf["temp"], ds["temperature"])


@requires_regex
def test_regex_match():
# test that having a global regex expression flag later in the expression will work if
# regex is found
vocab = {"temp": {"name": "tem|(?i)temp"}}
ds = xr.Dataset()
ds["Tempblah"] = [0, 1, 2]
with cf_xarray.set_options(custom_criteria=vocab):
assert_identical(ds.cf["temp"], ds["Tempblah"])


def test_cf_standard_name_table_version() -> None:

url = (
Expand Down
1 change: 1 addition & 0 deletions ci/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies:
- pandas
- pooch
- pint
- regex
- furo
- pip:
- git+https://github.com/xarray-contrib/cf-xarray
Expand Down
1 change: 1 addition & 0 deletions ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies:
- pandas
- pint
- pooch
- regex
- scipy
- shapely
- xarray
17 changes: 17 additions & 0 deletions doc/custom-criteria.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,20 @@ cfxr.set_options(custom_criteria=salt_criteria)

ds.cf[["salinity"]]
```

## More complex matches with `regex`

Here is an example of a more complicated custom criteria, which requires the package [`regex`](https://github.com/mrabarnett/mrab-regex) to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status".

```{code-cell}
import cf_xarray as cfxr
import xarray as xr

vocab = {"sea_ice_u": {"name": "(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*u)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*x)(?=.*vel)"}}
ds = xr.Dataset()
ds["sea_ice_velocity_x"] = [0,1,2]

with cfxr.set_options(custom_criteria=vocab):
seaiceu = ds.cf["sea_ice_u"]
seaiceu
```
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
What's New
----------

v0.7.8 (unreleased)
===================

- Optionally use the `regex` package to continue supporting global flags in regular expressions that are not at start of pattern. (:pr:`408`). By `Kristen Thyng`_

v0.7.7 (Jan 14, 2023)
=====================

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
dynamic = ["version"]

[project.optional-dependencies]
all = ["matplotlib", "pint", "shapely"]
all = ["matplotlib", "pint", "shapely", "regex"]

[project.urls]
homepage = "https://cf-xarray.readthedocs.io"
Expand Down