Skip to content

Commit 5c6aebc

Browse files
griveratmax-sixty
authored andcommitted
Add head, tail and thin methods (#3278)
* Add head, tail and thin methods * Update api and whats-new * Fix pep8 issues * Fix typo * Tests for DataArray
1 parent 683aaf6 commit 5c6aebc

File tree

6 files changed

+188
-1
lines changed

6 files changed

+188
-1
lines changed

doc/api.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ Indexing
117117
Dataset.loc
118118
Dataset.isel
119119
Dataset.sel
120+
Dataset.head
121+
Dataset.tail
122+
Dataset.thin
120123
Dataset.squeeze
121124
Dataset.interp
122125
Dataset.interp_like
@@ -279,6 +282,9 @@ Indexing
279282
DataArray.loc
280283
DataArray.isel
281284
DataArray.sel
285+
Dataset.head
286+
Dataset.tail
287+
Dataset.thin
282288
DataArray.squeeze
283289
DataArray.interp
284290
DataArray.interp_like

doc/whats-new.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ New functions/methods
8787
Currently only :py:meth:`Dataset.plot.scatter` is implemented.
8888
By `Yohai Bar Sinai <https://github.com/yohai>`_ and `Deepak Cherian <https://github.com/dcherian>`_
8989

90+
- Added `head`, `tail` and `thin` methods to `Dataset` and `DataArray`. (:issue:`319`)
91+
By `Gerardo Rivera <https://github.com/dangomelon>`_.
92+
9093
Enhancements
9194
~~~~~~~~~~~~
9295

@@ -102,7 +105,7 @@ Enhancements
102105

103106
- Added the ability to initialize an empty or full DataArray
104107
with a single value. (:issue:`277`)
105-
By `Gerardo Rivera <http://github.com/dangomelon>`_.
108+
By `Gerardo Rivera <https://github.com/dangomelon>`_.
106109

107110
- :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used
108111
with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`.

xarray/core/dataarray.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,55 @@ def sel(
10401040
)
10411041
return self._from_temp_dataset(ds)
10421042

1043+
def head(
1044+
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
1045+
) -> "DataArray":
1046+
"""Return a new DataArray whose data is given by the the first `n`
1047+
values along the specified dimension(s).
1048+
1049+
See Also
1050+
--------
1051+
Dataset.head
1052+
DataArray.tail
1053+
DataArray.thin
1054+
"""
1055+
1056+
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
1057+
ds = self._to_temp_dataset().head(indexers=indexers)
1058+
return self._from_temp_dataset(ds)
1059+
1060+
def tail(
1061+
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
1062+
) -> "DataArray":
1063+
"""Return a new DataArray whose data is given by the the last `n`
1064+
values along the specified dimension(s).
1065+
1066+
See Also
1067+
--------
1068+
Dataset.tail
1069+
DataArray.head
1070+
DataArray.thin
1071+
"""
1072+
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
1073+
ds = self._to_temp_dataset().tail(indexers=indexers)
1074+
return self._from_temp_dataset(ds)
1075+
1076+
def thin(
1077+
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
1078+
) -> "DataArray":
1079+
"""Return a new DataArray whose data is given by each `n` value
1080+
along the specified dimension(s).
1081+
1082+
See Also
1083+
--------
1084+
Dataset.thin
1085+
DataArray.head
1086+
DataArray.tail
1087+
"""
1088+
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
1089+
ds = self._to_temp_dataset().thin(indexers=indexers)
1090+
return self._from_temp_dataset(ds)
1091+
10431092
def broadcast_like(
10441093
self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None
10451094
) -> "DataArray":

xarray/core/dataset.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,6 +2008,90 @@ def sel(
20082008
result = self.isel(indexers=pos_indexers, drop=drop)
20092009
return result._overwrite_indexes(new_indexes)
20102010

2011+
def head(
2012+
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
2013+
) -> "Dataset":
2014+
"""Returns a new dataset with the first `n` values of each array
2015+
for the specified dimension(s).
2016+
2017+
Parameters
2018+
----------
2019+
indexers : dict, optional
2020+
A dict with keys matching dimensions and integer values `n`.
2021+
One of indexers or indexers_kwargs must be provided.
2022+
**indexers_kwargs : {dim: n, ...}, optional
2023+
The keyword arguments form of ``indexers``.
2024+
One of indexers or indexers_kwargs must be provided.
2025+
2026+
2027+
See Also
2028+
--------
2029+
Dataset.tail
2030+
Dataset.thin
2031+
DataArray.head
2032+
"""
2033+
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
2034+
indexers = {k: slice(val) for k, val in indexers.items()}
2035+
return self.isel(indexers)
2036+
2037+
def tail(
2038+
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
2039+
) -> "Dataset":
2040+
"""Returns a new dataset with the last `n` values of each array
2041+
for the specified dimension(s).
2042+
2043+
Parameters
2044+
----------
2045+
indexers : dict, optional
2046+
A dict with keys matching dimensions and integer values `n`.
2047+
One of indexers or indexers_kwargs must be provided.
2048+
**indexers_kwargs : {dim: n, ...}, optional
2049+
The keyword arguments form of ``indexers``.
2050+
One of indexers or indexers_kwargs must be provided.
2051+
2052+
2053+
See Also
2054+
--------
2055+
Dataset.head
2056+
Dataset.thin
2057+
DataArray.tail
2058+
"""
2059+
2060+
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
2061+
indexers = {
2062+
k: slice(-val, None) if val != 0 else slice(val)
2063+
for k, val in indexers.items()
2064+
}
2065+
return self.isel(indexers)
2066+
2067+
def thin(
2068+
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
2069+
) -> "Dataset":
2070+
"""Returns a new dataset with each array indexed along every `n`th
2071+
value for the specified dimension(s)
2072+
2073+
Parameters
2074+
----------
2075+
indexers : dict, optional
2076+
A dict with keys matching dimensions and integer values `n`.
2077+
One of indexers or indexers_kwargs must be provided.
2078+
**indexers_kwargs : {dim: n, ...}, optional
2079+
The keyword arguments form of ``indexers``.
2080+
One of indexers or indexers_kwargs must be provided.
2081+
2082+
2083+
See Also
2084+
--------
2085+
Dataset.head
2086+
Dataset.tail
2087+
DataArray.thin
2088+
"""
2089+
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
2090+
if 0 in indexers.values():
2091+
raise ValueError("step cannot be zero")
2092+
indexers = {k: slice(None, None, val) for k, val in indexers.items()}
2093+
return self.isel(indexers)
2094+
20112095
def broadcast_like(
20122096
self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None
20132097
) -> "Dataset":

xarray/tests/test_dataarray.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,19 @@ def test_isel_drop(self):
10021002
selected = data.isel(x=0, drop=False)
10031003
assert_identical(expected, selected)
10041004

1005+
def test_head(self):
1006+
assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5))
1007+
assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0))
1008+
1009+
def test_tail(self):
1010+
assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5))
1011+
assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0))
1012+
1013+
def test_thin(self):
1014+
assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5))
1015+
with raises_regex(ValueError, "cannot be zero"):
1016+
self.dv.thin(time=0)
1017+
10051018
def test_loc(self):
10061019
self.ds["x"] = ("x", np.array(list("abcdefghij")))
10071020
da = self.ds["foo"]

xarray/tests/test_dataset.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,6 +1411,38 @@ def test_isel_drop(self):
14111411
selected = data.isel(x=0, drop=False)
14121412
assert_identical(expected, selected)
14131413

1414+
def test_head(self):
1415+
data = create_test_data()
1416+
1417+
expected = data.isel(time=slice(5), dim2=slice(6))
1418+
actual = data.head(time=5, dim2=6)
1419+
assert_equal(expected, actual)
1420+
1421+
expected = data.isel(time=slice(0))
1422+
actual = data.head(time=0)
1423+
assert_equal(expected, actual)
1424+
1425+
def test_tail(self):
1426+
data = create_test_data()
1427+
1428+
expected = data.isel(time=slice(-5, None), dim2=slice(-6, None))
1429+
actual = data.tail(time=5, dim2=6)
1430+
assert_equal(expected, actual)
1431+
1432+
expected = data.isel(dim1=slice(0))
1433+
actual = data.tail(dim1=0)
1434+
assert_equal(expected, actual)
1435+
1436+
def test_thin(self):
1437+
data = create_test_data()
1438+
1439+
expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6))
1440+
actual = data.thin(time=5, dim2=6)
1441+
assert_equal(expected, actual)
1442+
1443+
with raises_regex(ValueError, "cannot be zero"):
1444+
data.thin(time=0)
1445+
14141446
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
14151447
def test_sel_fancy(self):
14161448
data = create_test_data()

0 commit comments

Comments
 (0)