Skip to content

Commit 742d000

Browse files
authored
#1621 optional decode timedelta (#4071)
* add decode_timedelta kwarg in decode_cf and open_* functions and test. * Fix style issue * Add chang author reference * removed check decode_timedelta in open_dataset * fix docstring indentation * fix: force dtype in test decode_timedelta
1 parent 2542a63 commit 742d000

File tree

5 files changed

+81
-5
lines changed

5 files changed

+81
-5
lines changed

doc/whats-new.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ New Features
7474
where the result of a computation could not be inferred automatically.
7575
By `Deepak Cherian <https://github.com/dcherian>`_
7676

77+
- Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`,
78+
(:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
79+
:py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas
80+
independently of time decoding (:issue:`1621`)
81+
`Aureliana Barghini <https://github.com/aurghs>`
82+
7783
Bug fixes
7884
~~~~~~~~~
7985
- Support dark mode in VS code (:issue:`4024`)

xarray/backends/api.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ def open_dataset(
303303
drop_variables=None,
304304
backend_kwargs=None,
305305
use_cftime=None,
306+
decode_timedelta=None,
306307
):
307308
"""Open and decode a dataset from a file or file-like object.
308309
@@ -383,6 +384,11 @@ def open_dataset(
383384
represented using ``np.datetime64[ns]`` objects. If False, always
384385
decode times to ``np.datetime64[ns]`` objects; if this is not possible
385386
raise an error.
387+
decode_timedelta : bool, optional
388+
If True, decode variables and coordinates with time units in
389+
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
390+
into timedelta objects. If False, leave them encoded as numbers.
391+
If None (default), assume the same value of decode_time.
386392
387393
Returns
388394
-------
@@ -435,6 +441,7 @@ def open_dataset(
435441
decode_times = False
436442
concat_characters = False
437443
decode_coords = False
444+
decode_timedelta = False
438445

439446
if cache is None:
440447
cache = chunks is None
@@ -451,6 +458,7 @@ def maybe_decode_store(store, lock=False):
451458
decode_coords=decode_coords,
452459
drop_variables=drop_variables,
453460
use_cftime=use_cftime,
461+
decode_timedelta=decode_timedelta,
454462
)
455463

456464
_protect_dataset_variables_inplace(ds, cache)
@@ -477,6 +485,7 @@ def maybe_decode_store(store, lock=False):
477485
chunks,
478486
drop_variables,
479487
use_cftime,
488+
decode_timedelta,
480489
)
481490
name_prefix = "open_dataset-%s" % token
482491
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
@@ -561,6 +570,7 @@ def open_dataarray(
561570
drop_variables=None,
562571
backend_kwargs=None,
563572
use_cftime=None,
573+
decode_timedelta=None,
564574
):
565575
"""Open an DataArray from a file or file-like object containing a single
566576
data variable.
@@ -640,6 +650,11 @@ def open_dataarray(
640650
represented using ``np.datetime64[ns]`` objects. If False, always
641651
decode times to ``np.datetime64[ns]`` objects; if this is not possible
642652
raise an error.
653+
decode_timedelta : bool, optional
654+
If True, decode variables and coordinates with time units in
655+
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
656+
into timedelta objects. If False, leave them encoded as numbers.
657+
If None (default), assume the same value of decode_time.
643658
644659
Notes
645660
-----
@@ -671,6 +686,7 @@ def open_dataarray(
671686
drop_variables=drop_variables,
672687
backend_kwargs=backend_kwargs,
673688
use_cftime=use_cftime,
689+
decode_timedelta=decode_timedelta,
674690
)
675691

676692
if len(dataset.data_vars) != 1:

xarray/backends/zarr.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ def open_zarr(
503503
drop_variables=None,
504504
consolidated=False,
505505
overwrite_encoded_chunks=False,
506+
decode_timedelta=None,
506507
**kwargs,
507508
):
508509
"""Load and decode a dataset from a Zarr store.
@@ -562,6 +563,11 @@ def open_zarr(
562563
consolidated : bool, optional
563564
Whether to open the store using zarr's consolidated metadata
564565
capability. Only works for stores that have already been consolidated.
566+
decode_timedelta : bool, optional
567+
If True, decode variables and coordinates with time units in
568+
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
569+
into timedelta objects. If False, leave them encoded as numbers.
570+
If None (default), assume the same value of decode_time.
565571
566572
Returns
567573
-------
@@ -612,6 +618,7 @@ def open_zarr(
612618
decode_times = False
613619
concat_characters = False
614620
decode_coords = False
621+
decode_timedelta = False
615622

616623
def maybe_decode_store(store, lock=False):
617624
ds = conventions.decode_cf(
@@ -621,6 +628,7 @@ def maybe_decode_store(store, lock=False):
621628
concat_characters=concat_characters,
622629
decode_coords=decode_coords,
623630
drop_variables=drop_variables,
631+
decode_timedelta=decode_timedelta,
624632
)
625633

626634
# TODO: this is where we would apply caching

xarray/conventions.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ def decode_cf_variable(
266266
decode_endianness=True,
267267
stack_char_dim=True,
268268
use_cftime=None,
269+
decode_timedelta=None,
269270
):
270271
"""
271272
Decodes a variable which may hold CF encoded information.
@@ -315,6 +316,9 @@ def decode_cf_variable(
315316
var = as_variable(var)
316317
original_dtype = var.dtype
317318

319+
if decode_timedelta is None:
320+
decode_timedelta = decode_times
321+
318322
if concat_characters:
319323
if stack_char_dim:
320324
var = strings.CharacterArrayCoder().decode(var, name=name)
@@ -328,12 +332,10 @@ def decode_cf_variable(
328332
]:
329333
var = coder.decode(var, name=name)
330334

335+
if decode_timedelta:
336+
var = times.CFTimedeltaCoder().decode(var, name=name)
331337
if decode_times:
332-
for coder in [
333-
times.CFTimedeltaCoder(),
334-
times.CFDatetimeCoder(use_cftime=use_cftime),
335-
]:
336-
var = coder.decode(var, name=name)
338+
var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)
337339

338340
dimensions, data, attributes, encoding = variables.unpack_for_decoding(var)
339341
# TODO(shoyer): convert everything below to use coders
@@ -442,6 +444,7 @@ def decode_cf_variables(
442444
decode_coords=True,
443445
drop_variables=None,
444446
use_cftime=None,
447+
decode_timedelta=None,
445448
):
446449
"""
447450
Decode several CF encoded variables.
@@ -492,6 +495,7 @@ def stackable(dim):
492495
decode_times=decode_times,
493496
stack_char_dim=stack_char_dim,
494497
use_cftime=use_cftime,
498+
decode_timedelta=decode_timedelta,
495499
)
496500
if decode_coords:
497501
var_attrs = new_vars[k].attrs
@@ -518,6 +522,7 @@ def decode_cf(
518522
decode_coords=True,
519523
drop_variables=None,
520524
use_cftime=None,
525+
decode_timedelta=None,
521526
):
522527
"""Decode the given Dataset or Datastore according to CF conventions into
523528
a new Dataset.
@@ -552,6 +557,11 @@ def decode_cf(
552557
represented using ``np.datetime64[ns]`` objects. If False, always
553558
decode times to ``np.datetime64[ns]`` objects; if this is not possible
554559
raise an error.
560+
decode_timedelta : bool, optional
561+
If True, decode variables and coordinates with time units in
562+
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
563+
into timedelta objects. If False, leave them encoded as numbers.
564+
If None (default), assume the same value of decode_time.
555565
556566
Returns
557567
-------
@@ -583,6 +593,7 @@ def decode_cf(
583593
decode_coords,
584594
drop_variables=drop_variables,
585595
use_cftime=use_cftime,
596+
decode_timedelta=decode_timedelta,
586597
)
587598
ds = Dataset(vars, attrs=attrs)
588599
ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))

xarray/tests/test_conventions.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,41 @@ def test_decode_dask_times(self):
311311
conventions.decode_cf(original).chunk(),
312312
)
313313

314+
def test_decode_cf_time_kwargs(self):
315+
ds = Dataset.from_dict(
316+
{
317+
"coords": {
318+
"timedelta": {
319+
"data": np.array([1, 2, 3], dtype="int64"),
320+
"dims": "timedelta",
321+
"attrs": {"units": "days"},
322+
},
323+
"time": {
324+
"data": np.array([1, 2, 3], dtype="int64"),
325+
"dims": "time",
326+
"attrs": {"units": "days since 2000-01-01"},
327+
},
328+
},
329+
"dims": {"time": 3, "timedelta": 3},
330+
"data_vars": {
331+
"a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))},
332+
},
333+
}
334+
)
335+
336+
dsc = conventions.decode_cf(ds)
337+
assert dsc.timedelta.dtype == np.dtype("m8[ns]")
338+
assert dsc.time.dtype == np.dtype("M8[ns]")
339+
dsc = conventions.decode_cf(ds, decode_times=False)
340+
assert dsc.timedelta.dtype == np.dtype("int64")
341+
assert dsc.time.dtype == np.dtype("int64")
342+
dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False)
343+
assert dsc.timedelta.dtype == np.dtype("int64")
344+
assert dsc.time.dtype == np.dtype("M8[ns]")
345+
dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True)
346+
assert dsc.timedelta.dtype == np.dtype("m8[ns]")
347+
assert dsc.time.dtype == np.dtype("int64")
348+
314349

315350
class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore):
316351
def encode_variable(self, var):

0 commit comments

Comments
 (0)