Skip to content

ENH: Add an errors flag to tz_localize #13058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,15 @@ New features



.. _whatsnew_0182.enhancements.other:


Other enhancements
^^^^^^^^^^^^^^^^^^


- The ``tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword,
so you can silently ignore nonexistent timestamps and replace them with ``NaT`` (``errors='coerce'``).
The default behaviour is still raising a ``NonExistentTimeError`` (``errors='raise'``) (:issue:`13057`)



Expand Down
14 changes: 12 additions & 2 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,7 @@ def tz_convert(self, tz):

@deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous',
mapping={True: 'infer', False: 'raise'})
def tz_localize(self, tz, ambiguous='raise'):
def tz_localize(self, tz, ambiguous='raise', errors='raise'):
"""
Localize tz-naive DatetimeIndex to given time zone (using
pytz/dateutil), or remove timezone from tz-aware DatetimeIndex
Expand All @@ -1832,6 +1832,15 @@ def tz_localize(self, tz, ambiguous='raise'):
- 'NaT' will return NaT where there are ambiguous times
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
times
errors : 'raise', 'coerce', default 'raise'
- 'raise' will raise a NonExistentTimeError if a timestamp is not
valid in the specified timezone (e.g. due to a transition from
or to DST time)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we also need to add this same option to Timestamp.tz_localize

- 'coerce' will return NaT if the timestamp can not be converted
into the specified timezone

.. versionadded:: 0.18.2

infer_dst : boolean, default False (DEPRECATED)
Attempt to infer fall dst-transition hours based on order

Expand All @@ -1854,7 +1863,8 @@ def tz_localize(self, tz, ambiguous='raise'):
# Convert to UTC

new_dates = tslib.tz_localize_to_utc(self.asi8, tz,
ambiguous=ambiguous)
ambiguous=ambiguous,
errors=errors)
new_dates = new_dates.view(_NS_DTYPE)
return self._shallow_copy(new_dates, tz=tz)

Expand Down
17 changes: 17 additions & 0 deletions pandas/tseries/tests/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,23 @@ def test_ambiguous_nat(self):
di_test = DatetimeIndex(times, tz='US/Eastern')
self.assert_numpy_array_equal(di_test, localized)

def test_nonexistent_raise_coerce(self):
# See issue 13057
from pytz.exceptions import NonExistentTimeError
times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00']
index = DatetimeIndex(times)
tz = 'US/Eastern'
self.assertRaises(NonExistentTimeError,
index.tz_localize, tz=tz)
self.assertRaises(NonExistentTimeError,
index.tz_localize, tz=tz, errors='raise')
result = index.tz_localize(tz=tz, errors='coerce')
test_times = ['2015-03-08 01:00-05:00', 'NaT',
'2015-03-08 03:00-04:00']
expected = DatetimeIndex(test_times)\
.tz_localize('UTC').tz_convert('US/Eastern')
tm.assert_index_equal(result, expected)

# test utility methods
def test_infer_tz(self):
eastern = self.tz('US/Eastern')
Expand Down
23 changes: 23 additions & 0 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,29 @@ def test_tz_localize_ambiguous(self):
'tz_localize to localize'):
Timestamp('2011-01-01').tz_convert('Asia/Tokyo')

def test_tz_localize_nonexistent(self):
# See issue 13057
from pytz.exceptions import NonExistentTimeError
times = ['2015-03-08 02:00', '2015-03-08 02:30',
'2015-03-29 02:00', '2015-03-29 02:30']
timezones = ['US/Eastern', 'US/Pacific',
'Europe/Paris', 'Europe/Belgrade']
for t, tz in zip(times, timezones):
ts = Timestamp(t)
self.assertRaises(NonExistentTimeError, ts.tz_localize,
tz)
self.assertRaises(NonExistentTimeError, ts.tz_localize,
tz, errors='raise')
self.assertIs(ts.tz_localize(tz, errors='coerce'),
pd.NaT)

def test_tz_localize_errors_ambiguous(self):
# See issue 13057
from pytz.exceptions import AmbiguousTimeError
ts = pd.Timestamp('2015-11-1 01:00')
self.assertRaises(AmbiguousTimeError,
ts.tz_localize, 'US/Pacific', errors='coerce')

def test_tz_localize_roundtrip(self):
for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']:
for t in ['2014-02-01 09:00', '2014-07-08 09:00',
Expand Down
25 changes: 20 additions & 5 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ class Timestamp(_Timestamp):
def is_year_end(self):
return self._get_start_end_field('is_year_end')

def tz_localize(self, tz, ambiguous='raise'):
def tz_localize(self, tz, ambiguous='raise', errors='raise'):
"""
Convert naive Timestamp to local time zone, or remove
timezone from tz-aware Timestamp.
Expand All @@ -475,6 +475,14 @@ class Timestamp(_Timestamp):
that this flag is only applicable for ambiguous fall dst dates)
- 'NaT' will return NaT for an ambiguous time
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
errors : 'raise', 'coerce', default 'raise'
- 'raise' will raise a NonExistentTimeError if a timestamp is not
valid in the specified timezone (e.g. due to a transition from
or to DST time)
- 'coerce' will return NaT if the timestamp can not be converted
into the specified timezone

.. versionadded:: 0.18.2

Returns
-------
Expand All @@ -494,7 +502,7 @@ class Timestamp(_Timestamp):
if not isinstance(ambiguous, basestring):
ambiguous = [ambiguous]
value = tz_localize_to_utc(np.array([self.value],dtype='i8'), tz,
ambiguous=ambiguous)[0]
ambiguous=ambiguous, errors=errors)[0]
return Timestamp(value, tz=tz)
else:
if tz is None:
Expand Down Expand Up @@ -3943,7 +3951,8 @@ cpdef ndarray _unbox_utcoffsets(object transinfo):

@cython.boundscheck(False)
@cython.wraparound(False)
def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None):
def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
object errors='raise'):
"""
Localize tzinfo-naive DateRange to given time zone (using pytz). If
there are ambiguities in the values, raise AmbiguousTimeError.
Expand All @@ -3960,9 +3969,12 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None):
ndarray[int64_t] result, result_a, result_b, dst_hours
pandas_datetimestruct dts
bint infer_dst = False, is_dst = False, fill = False
bint is_coerce = errors == 'coerce', is_raise = errors == 'raise'

# Vectorized version of DstTzInfo.localize

assert is_coerce or is_raise

if not have_pytz:
raise Exception("Could not find pytz module")

Expand Down Expand Up @@ -4092,8 +4104,11 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None):
elif right != NPY_NAT:
result[i] = right
else:
stamp = Timestamp(vals[i])
raise pytz.NonExistentTimeError(stamp)
if is_coerce:
result[i] = NPY_NAT
else:
stamp = Timestamp(vals[i])
raise pytz.NonExistentTimeError(stamp)

return result

Expand Down