From 588b6a114839db872c91d09ed198ccc718d91248 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Mar 2018 18:52:37 +0100 Subject: [PATCH 1/5] Fix an overflow bug in decode_cf_datetime --- xarray/coding/times.py | 2 +- xarray/tests/test_coding_times.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 1bb4e31ae7e..3fd6f237bc9 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -166,7 +166,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): # Cast input dates to integers of nanoseconds because `pd.to_datetime` # works much faster when dealing with integers - flat_num_dates_ns_int = (flat_num_dates * + flat_num_dates_ns_int = (flat_num_dates.astype(np.float) * _NS_PER_TIME_DELTA[delta]).astype(np.int64) dates = (pd.to_timedelta(flat_num_dates_ns_int, 'ns') + diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index b85f92ece66..3c6fe71b872 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -112,6 +112,13 @@ def test_decode_cf_datetime_non_standard_units(self): actual = coding.times.decode_cf_datetime(np.arange(100), units) assert_array_equal(actual, expected) + def test_decode_cf_datetime_int32(self): + # regression test for gh#2002 + units = 'seconds since 1981-01-01' + expected = '2006-01-01T12:00:00.000000000' + actual = coding.times.decode_cf_datetime(np.int32(788961600), units) + assert_array_equal(str(actual), expected) + @requires_netcdftime def test_decode_cf_datetime_non_iso_strings(self): # datetime strings that are _almost_ ISO compliant but not quite, From 63bce5265ecec0f0971c6c6bd73f6d615ccf86be Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Mar 2018 19:06:29 +0100 Subject: [PATCH 2/5] Better test --- xarray/tests/test_coding_times.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 3c6fe71b872..faf9472c7c4 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -115,9 +115,9 @@ def test_decode_cf_datetime_non_standard_units(self): def test_decode_cf_datetime_int32(self): # regression test for gh#2002 units = 'seconds since 1981-01-01' - expected = '2006-01-01T12:00:00.000000000' + expected = np.datetime64('2006-01-01T12') actual = coding.times.decode_cf_datetime(np.int32(788961600), units) - assert_array_equal(str(actual), expected) + assert actual == expected @requires_netcdftime def test_decode_cf_datetime_non_iso_strings(self): From 2df0621b5499d5f0525f212d636aeb092da8acbf Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Mar 2018 20:37:40 +0100 Subject: [PATCH 3/5] Other solution --- doc/whats-new.rst | 3 +++ xarray/coding/times.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b28beb9e3b2..87b019b54aa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ Bug fixes - Fixed labeled indexing with slice bounds given by xarray objects with datetime64 or timedelta64 dtypes (:issue:`1240`). By `Stephan Hoyer `_. +- Fixed a bug in decode_cf_datetime where ``int32`` arrays weren't parsed + correctly (:issue:`2002`). + By `Fabien Maussion `_. .. _whats-new.0.10.2: diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 3fd6f237bc9..dac08d4db82 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -166,8 +166,9 @@ def decode_cf_datetime(num_dates, units, calendar=None): # Cast input dates to integers of nanoseconds because `pd.to_datetime` # works much faster when dealing with integers - flat_num_dates_ns_int = (flat_num_dates.astype(np.float) * - _NS_PER_TIME_DELTA[delta]).astype(np.int64) + # make _NS_PER_TIME_DELTA an array to ensure type upcasting + flat_num_dates_ns_int = (flat_num_dates * + [_NS_PER_TIME_DELTA[delta]]).astype(np.int64) dates = (pd.to_timedelta(flat_num_dates_ns_int, 'ns') + ref_date).values From 1f200f6e865a2ccb24e14dd0036463a8b952a3db Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Mar 2018 21:27:56 +0100 Subject: [PATCH 4/5] Better test --- xarray/tests/test_coding_times.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index faf9472c7c4..ab33329b51a 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -49,6 +49,7 @@ def test_cf_datetime(self): ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), (0, 'milliseconds since 2000-01-01T00:00:00'), (0, 'microseconds since 2000-01-01T00:00:00'), + (np.int32(788961600), 'seconds since 1981-01-01'), # GH2002 ]: for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: expected = _ensure_naive_tz( @@ -112,13 +113,6 @@ def test_decode_cf_datetime_non_standard_units(self): actual = coding.times.decode_cf_datetime(np.arange(100), units) assert_array_equal(actual, expected) - def test_decode_cf_datetime_int32(self): - # regression test for gh#2002 - units = 'seconds since 1981-01-01' - expected = np.datetime64('2006-01-01T12') - actual = coding.times.decode_cf_datetime(np.int32(788961600), units) - assert actual == expected - @requires_netcdftime def test_decode_cf_datetime_non_iso_strings(self): # datetime strings that are _almost_ ISO compliant but not quite, From 8283967ec13d5d7780806c00ee49d14aeb756973 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 24 Mar 2018 22:11:32 +0100 Subject: [PATCH 5/5] Back to previous because of appveyor --- xarray/coding/times.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index dac08d4db82..8a1e9f82c6c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -167,8 +167,8 @@ def decode_cf_datetime(num_dates, units, calendar=None): # Cast input dates to integers of nanoseconds because `pd.to_datetime` # works much faster when dealing with integers # make _NS_PER_TIME_DELTA an array to ensure type upcasting - flat_num_dates_ns_int = (flat_num_dates * - [_NS_PER_TIME_DELTA[delta]]).astype(np.int64) + flat_num_dates_ns_int = (flat_num_dates.astype(np.float64) * + _NS_PER_TIME_DELTA[delta]).astype(np.int64) dates = (pd.to_timedelta(flat_num_dates_ns_int, 'ns') + ref_date).values