diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 71bb8f79642dc..5baa26c5831c8 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -8,7 +8,7 @@ import time from cython import Py_ssize_t -from cpython.datetime cimport datetime +from cpython.datetime cimport datetime, tzinfo import numpy as np @@ -35,6 +35,8 @@ from nattype import nat_strings, NaT # ---------------------------------------------------------------------- # Constants +_default_tzinfos = {} + class DateParseError(ValueError): pass @@ -51,7 +53,7 @@ cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} def parse_datetime_string(date_string, freq=None, dayfirst=False, - yearfirst=False, **kwargs): + yearfirst=False, tzinfos=None, **kwargs): """parse datetime string, only returns datetime. Also cares special handling matching time patterns. @@ -66,10 +68,13 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False, if not _does_string_look_like_datetime(date_string): raise ValueError('Given date string not likely a datetime.') + if tzinfos is None: + tzinfos = _default_tzinfos + if _TIMEPAT.match(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME dt = du_parse(date_string, dayfirst=dayfirst, - yearfirst=yearfirst, **kwargs) + yearfirst=yearfirst, tzinfos=tzinfos, **kwargs) return dt try: @@ -82,7 +87,8 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False, try: dt = du_parse(date_string, default=_DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + dayfirst=dayfirst, yearfirst=yearfirst, + tzinfos=tzinfos, **kwargs) except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable @@ -132,7 +138,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, - yearfirst=False): + yearfirst=False, tzinfos=None): """parse datetime string, only returns datetime Returns @@ -153,6 +159,9 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, if not _does_string_look_like_datetime(date_string): raise ValueError('Given date string not likely a datetime.') + if tzinfos is None: + tzinfos = _default_tzinfos + try: return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) except DateParseError: @@ -163,7 +172,7 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, try: parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, dayfirst=dayfirst, yearfirst=yearfirst, - ignoretz=False, tzinfos=None) + ignoretz=False, tzinfos=tzinfos) except Exception as e: # TODO: allow raise of errors within instead raise DateParseError(e) @@ -305,8 +314,12 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, object reso = None dict repl = {} + if tzinfos is None: + tzinfos = _default_tzinfos + fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst) + res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst, + tzinfos=tzinfos) # dateutil 2.2 compat if isinstance(res, tuple): # PyTuple_Check @@ -342,16 +355,18 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) - if isinstance(tzdata, datetime.tzinfo): - tzinfo = tzdata + + if isinstance(tzdata, tzinfo): + tzobj = tzdata elif isinstance(tzdata, (str, unicode)): - tzinfo = _dateutil_tzstr(tzdata) + tzobj = _dateutil_tzstr(tzdata) elif isinstance(tzdata, int): - tzinfo = tzoffset(res.tzname, tzdata) + tzobj = tzoffset(res.tzname, tzdata) else: raise ValueError("offset must be tzinfo subclass, " "tz string, or int offset") - ret = ret.replace(tzinfo=tzinfo) + + ret = ret.replace(tzinfo=tzobj) elif res.tzname and res.tzname in time.tzname: ret = ret.replace(tzinfo=_dateutil_tzlocal()) elif res.tzoffset == 0: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d42a1ab72b156..c886f2da155db 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -9,6 +9,10 @@ module is imported, register them here rather then in the module. """ +from datetime import tzinfo + +from pandas.compat import string_types + import pandas.core.config as cf from pandas.core.config import ( is_bool, is_callable, is_instance_factory, is_int, is_one_of_factory, @@ -505,3 +509,29 @@ def register_converter_cb(key): with cf.config_prefix("plotting.matplotlib"): cf.register_option("register_converters", True, register_converter_doc, validator=bool, cb=register_converter_cb) + +# ------------ +# Date Parsing +# ------------ + +with cf.config_prefix("tslib"): + from pandas._libs.tslibs.parsing import _default_tzinfos + + def tz_validator(val): + msg = ("value passed to set_option('tslib.tzinfos') must be a " + "dictionary with string keys and tzinfo values") + if not isinstance(val, dict): + raise ValueError(msg) + + if not all(isinstance(key, string_types) and isinstance(value, tzinfo) + for key, value in val.items()): + raise ValueError(msg) + + # TODO: Should this be done elsewhere? + _default_tzinfos.clear() + _default_tzinfos.update(val) + + cf.register_option( + "tzinfos", _default_tzinfos, + "dictionary of tzinfo objects to pass to dateutil's parse function", + validator=tz_validator) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index f2b0ae98aff98..b1d5d143ce4c7 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -5,6 +5,7 @@ from datetime import datetime from dateutil.parser import parse +from dateutil.tz import gettz import numpy as np import pytest @@ -43,6 +44,18 @@ def test_parse_time_quarter_w_dash(self): class TestDatetimeParsingWrappers(object): + def test_parse_with_tzinfos(self): + CST = gettz("US/Central") + tzinfos = {"CST": CST} + + result = parsing.parse_datetime_string("2018-11-04 3:45 PM CST", + tzinfos=tzinfos) + # Note: We check similar parsing for to_datetime and + # Timestamp elsewhere + + # comparing using identity works for dateutil tzinfos, not pytz + assert result.tzinfo is CST + def test_does_not_convert_mixed_integer(self): bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T')