Skip to content

Commit 8b542f8

Browse files
committed
initial work on Dataset.query
1 parent 48378c4 commit 8b542f8

File tree

2 files changed

+88
-0
lines changed

2 files changed

+88
-0
lines changed

xarray/core/dataset.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6980,5 +6980,36 @@ def argmax(self, dim=None, axis=None, **kwargs):
69806980
"Dataset.argmin() with a sequence or ... for dim"
69816981
)
69826982

6983+
def query(
6984+
self,
6985+
queries: Mapping[Hashable, Any] = None,
6986+
parser: str = "pandas",
6987+
engine: str = None,
6988+
missing_dims: str = "raise",
6989+
**queries_kwargs: Any,
6990+
) -> "Dataset":
6991+
"""TODO docstring"""
6992+
6993+
# allow queries to be given either as a dict or as kwargs
6994+
queries = either_dict_or_kwargs(queries, queries_kwargs, "query")
6995+
6996+
# check queries
6997+
for dim, expr in queries.items():
6998+
if not isinstance(expr, str):
6999+
msg = f"expr for dim {dim} must be a string to be evaluated, {type(expr)} given"
7000+
raise ValueError(msg)
7001+
# TODO check missing dims here, or delegate to isel?
7002+
7003+
# evaluate the queries to create the indexers
7004+
indexers = {
7005+
dim: pd.eval(expr, resolvers=[self], parser=parser, engine=engine)
7006+
for dim, expr in queries.items()
7007+
}
7008+
7009+
# TODO any validation of indexers? Or just let isel try to handle it?
7010+
7011+
# apply the selection
7012+
return self.isel(indexers, missing_dims=missing_dims)
7013+
69837014

69847015
ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)

xarray/tests/test_dataset.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5807,6 +5807,63 @@ def test_astype_attrs(self):
58075807
assert not data.astype(float, keep_attrs=False).attrs
58085808
assert not data.astype(float, keep_attrs=False).var1.attrs
58095809

5810+
def test_query_single_dim(self):
5811+
"""Test querying a single dimension."""
5812+
5813+
# setup test data
5814+
np.random.seed(42)
5815+
a = np.arange(0, 10, 1)
5816+
b = np.random.randint(0, 100, size=10)
5817+
c = np.linspace(0, 1, 20)
5818+
d = np.arange(0, 200).reshape(10, 20)
5819+
ds = Dataset(
5820+
{"a": ("x", a), "b": ("x", b), "c": ("y", c), "d": (("x", "y"), d)}
5821+
)
5822+
5823+
# query single dim, single variable
5824+
actual = ds.query(x="a > 5")
5825+
expect = ds.isel(x=(a > 5))
5826+
assert_identical(expect, actual)
5827+
5828+
# query single dim, single variable, via dict
5829+
actual = ds.query(dict(x="a > 5"))
5830+
expect = ds.isel(dict(x=(a > 5)))
5831+
assert_identical(expect, actual)
5832+
5833+
# query single dim, single variable
5834+
actual = ds.query(x="b > 50")
5835+
expect = ds.isel(x=(b > 50))
5836+
assert_identical(expect, actual)
5837+
5838+
# query single dim, single variable
5839+
actual = ds.query(y="c < .5")
5840+
expect = ds.isel(y=(c < 0.5))
5841+
assert_identical(expect, actual)
5842+
5843+
# query single dim, multiple variables
5844+
actual = ds.query(x="(a > 5) & (b > 50)")
5845+
expect = ds.isel(x=((a > 5) & (b > 50)))
5846+
assert_identical(expect, actual)
5847+
5848+
# support pandas query parser
5849+
actual = ds.query(x="(a > 5) and (b > 50)")
5850+
expect = ds.isel(x=((a > 5) & (b > 50)))
5851+
assert_identical(expect, actual)
5852+
5853+
# query multiple dims via kwargs
5854+
actual = ds.query(x="a > 5", y="c < .5")
5855+
expect = ds.isel(x=(a > 5), y=(c < 0.5))
5856+
assert_identical(expect, actual)
5857+
5858+
# query multiple dims via dict
5859+
actual = ds.query(dict(x="a > 5", y="c < .5"))
5860+
expect = ds.isel(dict(x=(a > 5), y=(c < 0.5)))
5861+
assert_identical(expect, actual)
5862+
5863+
# TODO test error handling
5864+
5865+
# TODO test dask data variables
5866+
58105867

58115868
# Py.test tests
58125869

0 commit comments

Comments
 (0)