diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index f034e0e223e6b..6936e40da6fbe 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -163,6 +163,7 @@ Bug Fixes - Bug in ``DatetimeIndex`` when using ``time`` object as key (:issue:`8667`) - Bug in ``merge`` where ``how='left'`` and ``sort=False`` would not preserve left frame order (:issue:`7331`) - Bug in ``MultiIndex.reindex`` where reindexing at level would not reorder labels (:issue:`4088`) +- Bug in certain operations with dateutil timezones, manifesting with dateutil 2.3 (:issue:`8639`) - Fix negative step support for label-based slices (:issue:`8753`) diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 500e19d36fff6..d568a75f6874d 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -371,31 +371,31 @@ def test_range_tz_pytz(self): self.assertEqual(dr.tz.zone, tz.zone) self.assertEqual(dr[0], start) self.assertEqual(dr[2], end) - + def test_range_tz_dst_straddle_pytz(self): - + tm._skip_if_no_pytz() from pytz import timezone tz = timezone('US/Eastern') - dates = [(tz.localize(datetime(2014, 3, 6)), + dates = [(tz.localize(datetime(2014, 3, 6)), tz.localize(datetime(2014, 3, 12))), - (tz.localize(datetime(2013, 11, 1)), + (tz.localize(datetime(2013, 11, 1)), tz.localize(datetime(2013, 11, 6)))] for (start, end) in dates: dr = date_range(start, end, freq='D') self.assertEqual(dr[0], start) self.assertEqual(dr[-1], end) self.assertEqual(np.all(dr.hour==0), True) - + dr = date_range(start, end, freq='D', tz='US/Eastern') self.assertEqual(dr[0], start) self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour==0), True) - + self.assertEqual(np.all(dr.hour==0), True) + dr = date_range(start.replace(tzinfo=None), end.replace(tzinfo=None), freq='D', tz='US/Eastern') self.assertEqual(dr[0], start) self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour==0), True) + self.assertEqual(np.all(dr.hour==0), True) def test_range_tz_dateutil(self): # GH 2906 @@ -441,7 +441,7 @@ def test_month_range_union_tz_pytz(self): def test_month_range_union_tz_dateutil(self): _skip_if_windows_python_3() tm._skip_if_no_dateutil() - from dateutil.tz import gettz as timezone + from dateutil.zoneinfo import gettz as timezone tz = timezone('US/Eastern') early_start = datetime(2011, 1, 1) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 7a428fd629125..9b8200e266e5a 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -419,7 +419,7 @@ def test_timestamp_to_datetime_explicit_dateutil(self): tm._skip_if_no_dateutil() import dateutil rng = date_range('20090415', '20090519', - tz=dateutil.tz.gettz('US/Eastern')) + tz=dateutil.zoneinfo.gettz('US/Eastern')) stamp = rng[0] dtval = stamp.to_pydatetime() @@ -1797,7 +1797,7 @@ def test_append_concat_tz_explicit_pytz(self): def test_append_concat_tz_dateutil(self): # GH 2938 tm._skip_if_no_dateutil() - from dateutil.tz import gettz as timezone + from dateutil.zoneinfo import gettz as timezone rng = date_range('5/8/2012 1:45', periods=10, freq='5T', tz='dateutil/US/Eastern') diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 9fbdb714d8cfa..752d12743a5d3 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -443,7 +443,7 @@ def test_ambiguous_infer(self): localized_old = di.tz_localize(tz, infer_dst=True) self.assert_numpy_array_equal(dr, localized_old) self.assert_numpy_array_equal(dr, DatetimeIndex(times, tz=tz, ambiguous='infer')) - + # When there is no dst transition, nothing special happens dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=datetools.Hour()) @@ -463,31 +463,31 @@ def test_ambiguous_flags(self): times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', '11/06/2011 02:00', '11/06/2011 03:00'] - + # Test tz_localize di = DatetimeIndex(times) is_dst = [1, 1, 0, 0, 0] localized = di.tz_localize(tz, ambiguous=is_dst) self.assert_numpy_array_equal(dr, localized) self.assert_numpy_array_equal(dr, DatetimeIndex(times, tz=tz, ambiguous=is_dst)) - + localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) self.assert_numpy_array_equal(dr, localized) - + localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype('bool')) self.assert_numpy_array_equal(dr, localized) - + # Test constructor localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) self.assert_numpy_array_equal(dr, localized) - + # Test duplicate times where infer_dst fails times += times di = DatetimeIndex(times) - + # When the sizes are incompatible, make sure error is raised self.assertRaises(Exception, di.tz_localize, tz, ambiguous=is_dst) - + # When sizes are compatible and there are repeats ('infer' won't work) is_dst = np.hstack((is_dst, is_dst)) localized = di.tz_localize(tz, ambiguous=is_dst) @@ -501,7 +501,7 @@ def test_ambiguous_flags(self): localized = dr.tz_localize(tz) localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) self.assert_numpy_array_equal(localized, localized_is_dst) - + def test_ambiguous_nat(self): tz = self.tz('US/Eastern') times = ['11/06/2011 00:00', '11/06/2011 01:00', @@ -509,7 +509,7 @@ def test_ambiguous_nat(self): '11/06/2011 03:00'] di = DatetimeIndex(times) localized = di.tz_localize(tz, ambiguous='NaT') - + times = ['11/06/2011 00:00', np.NaN, np.NaN, '11/06/2011 02:00', '11/06/2011 03:00'] diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index ad0ef67b5aca2..679fd2992855c 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1,5 +1,5 @@ import nose - +from distutils.version import LooseVersion import numpy as np from pandas import tslib @@ -137,13 +137,24 @@ def test_constructor_with_stringoffset(self): self.assertEqual(result, eval(repr(result))) def test_repr(self): + tm._skip_if_no_pytz() + tm._skip_if_no_dateutil() + dates = ['2014-03-07', '2014-01-01 09:00', '2014-01-01 00:00:00.000000001'] - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/America/Los_Angeles'] + + # dateutil zone change (only matters for repr) + import dateutil + if dateutil.__version__ >= LooseVersion('2.3'): + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] + else: + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/America/Los_Angeles'] + freqs = ['D', 'M', 'S', 'N'] for date in dates: for tz in timezones: for freq in freqs: + # avoid to match with timezone name freq_repr = "'{0}'".format(freq) if tz.startswith('dateutil'): @@ -306,10 +317,10 @@ def test_now(self): ts_from_string = Timestamp('now') ts_from_method = Timestamp.now() ts_datetime = datetime.datetime.now() - + ts_from_string_tz = Timestamp('now', tz='US/Eastern') ts_from_method_tz = Timestamp.now(tz='US/Eastern') - + # Check that the delta between the times is less than 1s (arbitrarily small) delta = Timedelta(seconds=1) self.assertTrue((ts_from_method - ts_from_string) < delta) @@ -321,10 +332,10 @@ def test_today(self): ts_from_string = Timestamp('today') ts_from_method = Timestamp.today() ts_datetime = datetime.datetime.today() - + ts_from_string_tz = Timestamp('today', tz='US/Eastern') ts_from_method_tz = Timestamp.today(tz='US/Eastern') - + # Check that the delta between the times is less than 1s (arbitrarily small) delta = Timedelta(seconds=1) self.assertTrue((ts_from_method - ts_from_string) < delta) @@ -737,7 +748,7 @@ def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], [tslib.D_RESO, tslib.D_RESO, tslib.D_RESO, tslib.D_RESO, tslib.H_RESO, tslib.T_RESO,tslib.S_RESO, tslib.MS_RESO, tslib.US_RESO]): - for tz in [None, 'Asia/Tokyo', 'US/Eastern']: + for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Eastern']: idx = date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) result = tslib.resolution(idx.asi8, idx.tz) self.assertEqual(result, expected) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 1976eee96296c..e3e18b912132d 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -133,8 +133,8 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): dt = dt + tz.utcoffset(dt) result[i] = dt else: - trans = _get_transitions(tz) - deltas = _get_deltas(tz) + trans, deltas, typ = _get_dst_info(tz) + for i in range(n): value = arr[i] @@ -223,10 +223,10 @@ class Timestamp(_Timestamp): @classmethod def now(cls, tz=None): - """ + """ Return the current time in the local timezone. Equivalent to datetime.now([tz]) - + Parameters ---------- tz : string / timezone object, default None @@ -242,7 +242,7 @@ class Timestamp(_Timestamp): Return the current time in the local timezone. This differs from datetime.today() in that it can be localized to a passed timezone. - + Parameters ---------- tz : string / timezone object, default None @@ -1045,12 +1045,12 @@ cdef convert_to_tsobject(object ts, object tz, object unit): if util.is_string_object(ts): if ts in _nat_strings: ts = NaT - elif ts == 'now': - # Issue 9000, we short-circuit rather than going + elif ts == 'now': + # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns utc ts = Timestamp.now(tz) - elif ts == 'today': - # Issue 9000, we short-circuit rather than going + elif ts == 'today': + # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns a normalized datetime ts = Timestamp.today(tz) else: @@ -1174,8 +1174,8 @@ cdef inline void _localize_tso(_TSObject obj, object tz): obj.tzinfo = tz else: # Adjust datetime64 timestamp, recompute datetimestruct - trans = _get_transitions(tz) - deltas = _get_deltas(tz) + trans, deltas, typ = _get_dst_info(tz) + pos = trans.searchsorted(obj.value, side='right') - 1 @@ -2566,8 +2566,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): * 1000000000) utc_dates[i] = v - delta else: - deltas = _get_deltas(tz1) - trans = _get_transitions(tz1) + trans, deltas, typ = _get_dst_info(tz1) + trans_len = len(trans) pos = trans.searchsorted(vals[0]) - 1 if pos < 0: @@ -2598,9 +2598,9 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): return result # Convert UTC to other timezone - trans = _get_transitions(tz2) + trans, deltas, typ = _get_dst_info(tz2) trans_len = len(trans) - deltas = _get_deltas(tz2) + pos = trans.searchsorted(utc_dates[0]) - 1 if pos < 0: raise ValueError('First time before start of DST info') @@ -2639,8 +2639,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2): delta = int(total_seconds(_get_utcoffset(tz1, dt))) * 1000000000 utc_date = val - delta elif _get_zone(tz1) != 'UTC': - deltas = _get_deltas(tz1) - trans = _get_transitions(tz1) + trans, deltas, typ = _get_dst_info(tz1) pos = trans.searchsorted(val, side='right') - 1 if pos < 0: raise ValueError('First time before start of DST info') @@ -2658,8 +2657,8 @@ def tz_convert_single(int64_t val, object tz1, object tz2): delta = int(total_seconds(_get_utcoffset(tz2, dt))) * 1000000000 return utc_date + delta # Convert UTC to other timezone - trans = _get_transitions(tz2) - deltas = _get_deltas(tz2) + trans, deltas, typ = _get_dst_info(tz2) + pos = trans.searchsorted(utc_date, side='right') - 1 if pos < 0: raise ValueError('First time before start of DST info') @@ -2668,8 +2667,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2): return utc_date + offset # Timezone data caches, key is the pytz string or dateutil file name. -trans_cache = {} -utc_offset_cache = {} +dst_cache = {} cdef inline bint _treat_tz_as_pytz(object tz): return hasattr(tz, '_utc_transition_times') and hasattr(tz, '_transition_info') @@ -2708,40 +2706,67 @@ cdef inline object _tz_cache_key(object tz): return None -cdef object _get_transitions(object tz): +cdef object _get_dst_info(object tz): """ - Get UTC times of DST transitions + return a tuple of : + (UTC times of DST transitions, + UTC offsets in microseconds corresponding to DST transitions, + string of type of transitions) + """ cache_key = _tz_cache_key(tz) if cache_key is None: - return np.array([NPY_NAT + 1], dtype=np.int64) + num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 + return (np.array([NPY_NAT + 1], dtype=np.int64), + np.array([num], dtype=np.int64), + None) - if cache_key not in trans_cache: + if cache_key not in dst_cache: if _treat_tz_as_pytz(tz): - arr = np.array(tz._utc_transition_times, dtype='M8[ns]') - arr = arr.view('i8') + trans = np.array(tz._utc_transition_times, dtype='M8[ns]') + trans = trans.view('i8') try: if tz._utc_transition_times[0].year == 1: - arr[0] = NPY_NAT + 1 + trans[0] = NPY_NAT + 1 except Exception: pass + deltas = _unbox_utcoffsets(tz._transition_info) + typ = 'pytz' + elif _treat_tz_as_dateutil(tz): if len(tz._trans_list): # get utc trans times trans_list = _get_utc_trans_times_from_dateutil_tz(tz) - arr = np.hstack([np.array([0], dtype='M8[s]'), # place holder for first item - np.array(trans_list, dtype='M8[s]')]).astype('M8[ns]') # all trans listed - arr = arr.view('i8') - arr[0] = NPY_NAT + 1 + trans = np.hstack([np.array([0], dtype='M8[s]'), # place holder for first item + np.array(trans_list, dtype='M8[s]')]).astype('M8[ns]') # all trans listed + trans = trans.view('i8') + trans[0] = NPY_NAT + 1 + + # deltas + deltas = np.array([v.offset for v in (tz._ttinfo_before,) + tz._trans_idx], dtype='i8') # + (tz._ttinfo_std,) + deltas *= 1000000000 + typ = 'dateutil' + elif _is_fixed_offset(tz): - arr = np.array([NPY_NAT + 1], dtype=np.int64) + trans = np.array([NPY_NAT + 1], dtype=np.int64) + deltas = np.array([tz._ttinfo_std.offset], dtype='i8') * 1000000000 + typ = 'fixed' else: - arr = np.array([], dtype='M8[ns]') + trans = np.array([], dtype='M8[ns]') + deltas = np.array([], dtype='i8') + typ = None + + else: - arr = np.array([NPY_NAT + 1], dtype=np.int64) - trans_cache[cache_key] = arr - return trans_cache[cache_key] + # static tzinfo + trans = np.array([NPY_NAT + 1], dtype=np.int64) + num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 + deltas = np.array([num], dtype=np.int64) + typ = 'static' + + dst_cache[cache_key] = (trans, deltas, typ) + return dst_cache[cache_key] cdef object _get_utc_trans_times_from_dateutil_tz(object tz): ''' @@ -2756,35 +2781,6 @@ cdef object _get_utc_trans_times_from_dateutil_tz(object tz): new_trans[i] = trans - last_std_offset return new_trans - -cdef object _get_deltas(object tz): - """ - Get UTC offsets in microseconds corresponding to DST transitions - """ - cache_key = _tz_cache_key(tz) - if cache_key is None: - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 - return np.array([num], dtype=np.int64) - - if cache_key not in utc_offset_cache: - if _treat_tz_as_pytz(tz): - utc_offset_cache[cache_key] = _unbox_utcoffsets(tz._transition_info) - elif _treat_tz_as_dateutil(tz): - if len(tz._trans_list): - arr = np.array([v.offset for v in (tz._ttinfo_before,) + tz._trans_idx], dtype='i8') # + (tz._ttinfo_std,) - arr *= 1000000000 - utc_offset_cache[cache_key] = arr - elif _is_fixed_offset(tz): - utc_offset_cache[cache_key] = np.array([tz._ttinfo_std.offset], dtype='i8') * 1000000000 - else: - utc_offset_cache[cache_key] = np.array([], dtype='i8') - else: - # static tzinfo - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 - utc_offset_cache[cache_key] = np.array([num], dtype=np.int64) - - return utc_offset_cache[cache_key] - def tot_seconds(td): return total_seconds(td) @@ -2852,8 +2848,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None): if len(ambiguous) != len(vals): raise ValueError("Length of ambiguous bool-array must be the same size as vals") - trans = _get_transitions(tz) # transition dates - deltas = _get_deltas(tz) # utc offsets + trans, deltas, typ = _get_dst_info(tz) tdata = trans.data ntrans = len(trans) @@ -3464,15 +3459,15 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): result[i] = _normalized_stamp(&dts) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans = _get_transitions(tz) - deltas = _get_deltas(tz) + trans, deltas, typ = _get_dst_info(tz) + _pos = trans.searchsorted(stamps, side='right') - 1 if _pos.dtype != np.int64: _pos = _pos.astype(np.int64) pos = _pos # statictzinfo - if not hasattr(tz, '_transition_info'): + if typ not in ['pytz','dateutil']: for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT @@ -3521,8 +3516,8 @@ def dates_normalized(ndarray[int64_t] stamps, tz=None): if dt.hour > 0: return False else: - trans = _get_transitions(tz) - deltas = _get_deltas(tz) + trans, deltas, typ = _get_dst_info(tz) + for i in range(n): # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(stamps[i]) - 1 @@ -3609,15 +3604,15 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans = _get_transitions(tz) - deltas = _get_deltas(tz) + trans, deltas, typ = _get_dst_info(tz) + _pos = trans.searchsorted(stamps, side='right') - 1 if _pos.dtype != np.int64: _pos = _pos.astype(np.int64) pos = _pos # statictzinfo - if not hasattr(tz, '_transition_info'): + if typ not in ['pytz','dateutil']: for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT @@ -4111,15 +4106,15 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): reso = curr_reso else: # Adjust datetime64 timestamp, recompute datetimestruct - trans = _get_transitions(tz) - deltas = _get_deltas(tz) + trans, deltas, typ = _get_dst_info(tz) + _pos = trans.searchsorted(stamps, side='right') - 1 if _pos.dtype != np.int64: _pos = _pos.astype(np.int64) pos = _pos # statictzinfo - if not hasattr(tz, '_transition_info'): + if typ not in ['pytz','dateutil']: for i in range(n): if stamps[i] == NPY_NAT: continue