diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index ce1035e91391a..a07991d69d48b 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -71,6 +71,23 @@ Resample: ts.resample('D', how='mean') +.. _timeseries.overview: + +Overview +-------- + +Following table shows the type of time-related classes pandas can handle and +how to create them. + +================= ============================== ================================================== +Class Remarks How to create +================= ============================== ================================================== +``Timestamp`` Represents a single time stamp ``to_datetime``, ``Timestamp`` +``DatetimeIndex`` Index of ``Timestamps`` ``to_datetime``, ``date_range``, ``DatetimeIndex`` +``Period`` Represents a single time span ``Period`` +``PeriodIndex`` Index of ``Period`` ``period_range``, ``PeriodIndex`` +================= ============================== ================================================== + .. _timeseries.representation: Time Stamps vs. Time Spans @@ -78,30 +95,45 @@ Time Stamps vs. Time Spans Time-stamped data is the most basic type of timeseries data that associates values with points in time. For pandas objects it means using the points in -time to create the index +time. .. ipython:: python - dates = [datetime(2012, 5, 1), datetime(2012, 5, 2), datetime(2012, 5, 3)] - ts = Series(np.random.randn(3), dates) - - type(ts.index) - - ts + Timestamp(datetime(2012, 5, 1)) + Timestamp('2012-05-01') However, in many cases it is more natural to associate things like change -variables with a time span instead. +variables with a time span instead. The span represented by ``Period`` can be +specified explicitly, or inferred from datetime string format. For example: .. ipython:: python - periods = PeriodIndex([Period('2012-01'), Period('2012-02'), - Period('2012-03')]) + Period('2011-01') + + Period('2012-05', freq='D') + +``Timestamp`` and ``Period`` can be the index. Lists of ``Timestamp`` and +``Period`` are automatically coerce to ``DatetimeIndex`` and ``PeriodIndex`` +respectively. + +.. ipython:: python + + dates = [Timestamp('2012-05-01'), Timestamp('2012-05-02'), Timestamp('2012-05-03')] + ts = Series(np.random.randn(3), dates) + + type(ts.index) + ts.index + + ts + + periods = [Period('2012-01'), Period('2012-02'), Period('2012-03')] ts = Series(np.random.randn(3), periods) type(ts.index) + ts.index ts @@ -150,6 +182,17 @@ you can pass the ``dayfirst`` flag: considerably and on versions later then 0.13.0 explicitly specifying a format string of '%Y%m%d' takes a faster path still. +If you pass a single string to ``to_datetime``, it returns single ``Timestamp``. +Also, ``Timestamp`` can accept the string input. +Note that ``Timestamp`` doesn't accept string parsing option like ``dayfirst`` +or ``format``, use ``to_datetime`` if these are required. + +.. ipython:: python + + to_datetime('2010/11/12') + + Timestamp('2010/11/12') + Invalid Data ~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 05b69bae42c28..dbe07d74854e0 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -33,6 +33,45 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ +- ``DatetimeIndex`` can be instantiated using strings contains ``NaT`` (:issue:`7599`) +- The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has been made consistent" (:issue:`7599`) + + Prior to v0.17.0, ``Timestamp`` and ``to_datetime`` may parse year-only datetime-string incorrectly using today's date, otherwise ``DatetimeIndex`` uses the beginning of the year. + ``Timestamp`` and ``to_datetime`` may raise ``ValueError`` in some types of datetime-string which ``DatetimeIndex`` can parse, such as quarterly string. + + Previous Behavior + + .. code-block:: python + + In [1]: Timestamp('2012Q2') + Traceback + ... + ValueError: Unable to parse 2012Q2 + + # Results in today's date. + In [2]: Timestamp('2014') + Out [2]: 2014-08-12 00:00:00 + + v0.17.0 can parse them as below. It works on ``DatetimeIndex`` also. + + New Behaviour + + .. ipython:: python + + Timestamp('2012Q2') + Timestamp('2014') + DatetimeIndex(['2012Q2', '2014']) + + .. note:: If you want to perform calculations based on today's date, use ``Timestamp.now()`` and ``pandas.tseries.offsets``. + + .. ipython:: python + + import pandas.tseries.offsets as offsets + Timestamp.now() + Timestamp.now() + offsets.DateOffset(years=1) + +- ``to_datetime`` can now accept ``yearfirst`` keyword (:issue:`7599`) + - ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`) - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7d4c9df64c0bb..f6a487664046c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2047,8 +2047,9 @@ def _make_date_converter(date_parser=None, dayfirst=False, def converter(*date_cols): if date_parser is None: strs = _concat_date_cols(date_cols) + try: - return tools.to_datetime( + return tools._to_datetime( com._ensure_object(strs), utc=None, box=False, diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 9fb06d2854b11..85de5e083d6d9 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -314,14 +314,12 @@ def _get_freq_str(base, mult=1): } need_suffix = ['QS', 'BQ', 'BQS', 'AS', 'BA', 'BAS'] -_months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', - 'OCT', 'NOV', 'DEC'] for __prefix in need_suffix: - for _m in _months: + for _m in tslib._MONTHS: _offset_to_period_map['%s-%s' % (__prefix, _m)] = \ _offset_to_period_map[__prefix] for __prefix in ['A', 'Q']: - for _m in _months: + for _m in tslib._MONTHS: _alias = '%s-%s' % (__prefix, _m) _offset_to_period_map[_alias] = _alias @@ -1188,12 +1186,7 @@ def is_superperiod(source, target): return target in ['N'] -def _get_rule_month(source, default='DEC'): - source = source.upper() - if '-' not in source: - return default - else: - return source.split('-')[1] +_get_rule_month = tslib._get_rule_month def _is_annual(rule): @@ -1224,15 +1217,10 @@ def _is_weekly(rule): DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] -MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', - 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] - -_month_numbers = dict((k, i) for i, k in enumerate(MONTHS)) - - +MONTHS = tslib._MONTHS +_month_numbers = tslib._MONTH_NUMBERS +_month_aliases = tslib._MONTH_ALIASES _weekday_rule_aliases = dict((k, v) for k, v in enumerate(DAYS)) -_month_aliases = dict((k + 1, v) for k, v in enumerate(MONTHS)) - def _is_multiple(us, mult): return us % mult == 0 diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index a549c44d119c7..a8b6fb4389459 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -239,8 +239,9 @@ def __new__(cls, data=None, # try a few ways to make it datetime64 if lib.is_string_array(data): - data = _str_to_dt_array(data, freq, dayfirst=dayfirst, - yearfirst=yearfirst) + data = tslib.parse_str_array_to_datetime(data, freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst) else: data = tools.to_datetime(data, errors='raise') data.offset = freq @@ -254,8 +255,9 @@ def __new__(cls, data=None, return data if issubclass(data.dtype.type, compat.string_types): - data = _str_to_dt_array(data, freq, dayfirst=dayfirst, - yearfirst=yearfirst) + data = tslib.parse_str_array_to_datetime(data, freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst) if issubclass(data.dtype.type, np.datetime64): if isinstance(data, ABCSeries): @@ -288,8 +290,9 @@ def __new__(cls, data=None, values = data if lib.is_string_array(values): - subarr = _str_to_dt_array(values, freq, dayfirst=dayfirst, - yearfirst=yearfirst) + subarr = tslib.parse_str_array_to_datetime(values, freq=freq, dayfirst=dayfirst, + yearfirst=yearfirst) + else: try: subarr = tools.to_datetime(data, box=False) @@ -298,11 +301,11 @@ def __new__(cls, data=None, if isinstance(subarr, ABCSeries): subarr = subarr.values if subarr.dtype == np.object_: - subarr = tools.to_datetime(subarr, box=False) + subarr = tools._to_datetime(subarr, box=False) except ValueError: # tz aware - subarr = tools.to_datetime(data, box=False, utc=True) + subarr = tools._to_datetime(data, box=False, utc=True) if not np.issubdtype(subarr.dtype, np.datetime64): raise ValueError('Unable to convert %s to datetime dtype' @@ -332,7 +335,7 @@ def __new__(cls, data=None, if inferred != freq.freqstr: on_freq = cls._generate(subarr[0], None, len(subarr), None, freq, tz=tz) if not np.array_equal(subarr.asi8, on_freq.asi8): - raise ValueError('Inferred frequency {0} from passed dates does not' + raise ValueError('Inferred frequency {0} from passed dates does not ' 'conform to passed frequency {1}'.format(inferred, freq.freqstr)) if freq_infer: @@ -534,7 +537,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, xdr = generate_range(offset=offset, start=_CACHE_START, end=_CACHE_END) - arr = tools.to_datetime(list(xdr), box=False) + arr = tools._to_datetime(list(xdr), box=False) cachedRange = DatetimeIndex._simple_new(arr) cachedRange.offset = offset @@ -1926,17 +1929,6 @@ def _to_m8(key, tz=None): return np.int64(tslib.pydt_to_i8(key)).view(_NS_DTYPE) -def _str_to_dt_array(arr, offset=None, dayfirst=None, yearfirst=None): - def parser(x): - result = parse_time_string(x, offset, dayfirst=dayfirst, - yearfirst=yearfirst) - return result[0] - - arr = np.asarray(arr, dtype=object) - data = _algos.arrmap_object(arr, parser) - return tools.to_datetime(data) - - _CACHE_START = Timestamp(datetime(1950, 1, 1)) _CACHE_END = Timestamp(datetime(2030, 1, 1)) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 3a69a13739e5d..941456fa07cfa 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1508,22 +1508,7 @@ def onOffset(self, dt): modMonth = (dt.month - self.startingMonth) % 3 return BMonthEnd().onOffset(dt) and modMonth == 0 - -_int_to_month = { - 1: 'JAN', - 2: 'FEB', - 3: 'MAR', - 4: 'APR', - 5: 'MAY', - 6: 'JUN', - 7: 'JUL', - 8: 'AUG', - 9: 'SEP', - 10: 'OCT', - 11: 'NOV', - 12: 'DEC' -} - +_int_to_month = tslib._MONTH_ALIASES _month_to_int = dict((v, k) for k, v in _int_to_month.items()) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index d167982b5b0bd..c8b96076b26bd 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1432,6 +1432,25 @@ def test_dti_constructor_preserve_dti_freq(self): rng2 = DatetimeIndex(rng) self.assertEqual(rng.freq, rng2.freq) + def test_dti_constructor_years_only(self): + # GH 6961 + for tz in [None, 'UTC', 'Asia/Tokyo', 'dateutil/US/Pacific']: + rng1 = date_range('2014', '2015', freq='M', tz=tz) + expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) + + rng2 = date_range('2014', '2015', freq='MS', tz=tz) + expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', tz=tz) + + rng3 = date_range('2014', '2020', freq='A', tz=tz) + expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) + + rng4 = date_range('2014', '2020', freq='AS', tz=tz) + expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', tz=tz) + + for rng, expected in [(rng1, expected1), (rng2, expected2), + (rng3, expected3), (rng4, expected4)]: + tm.assert_index_equal(rng, expected) + def test_normalize(self): rng = date_range('1/1/2000 9:30', periods=10, freq='D') @@ -2146,6 +2165,15 @@ def test_constructor_coverage(self): from_ints = DatetimeIndex(expected.asi8) self.assertTrue(from_ints.equals(expected)) + # string with NaT + strings = np.array(['2000-01-01', '2000-01-02', 'NaT']) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype('O')) + self.assertTrue(result.equals(expected)) + + from_ints = DatetimeIndex(expected.asi8) + self.assertTrue(from_ints.equals(expected)) + # non-conforming self.assertRaises(ValueError, DatetimeIndex, ['2000-01-01', '2000-01-02', '2000-01-04'], diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 341450f504e2a..397d3f7d2656f 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -6,13 +6,15 @@ import pandas._period as period import datetime -from pandas.core.api import Timestamp, Series, Timedelta, Period +from pandas.core.api import Timestamp, Series, Timedelta, Period, to_datetime from pandas.tslib import get_timezone from pandas._period import period_asfreq, period_ordinal -from pandas.tseries.index import date_range +from pandas.tseries.index import date_range, DatetimeIndex from pandas.tseries.frequencies import get_freq +import pandas.tseries.tools as tools import pandas.tseries.offsets as offsets import pandas.util.testing as tm +import pandas.compat as compat from pandas.util.testing import assert_series_equal import pandas.compat as compat @@ -416,6 +418,7 @@ def test_nat_fields(self): class TestDatetimeParsingWrappers(tm.TestCase): + def test_does_not_convert_mixed_integer(self): bad_date_strings = ( '-50000', @@ -444,6 +447,179 @@ def test_does_not_convert_mixed_integer(self): tslib._does_string_look_like_datetime(good_date_string) ) + def test_parsers(self): + cases = {'2011-01-01': datetime.datetime(2011, 1, 1), + '2Q2005': datetime.datetime(2005, 4, 1), + '2Q05': datetime.datetime(2005, 4, 1), + '2005Q1': datetime.datetime(2005, 1, 1), + '05Q1': datetime.datetime(2005, 1, 1), + '2011Q3': datetime.datetime(2011, 7, 1), + '11Q3': datetime.datetime(2011, 7, 1), + '3Q2011': datetime.datetime(2011, 7, 1), + '3Q11': datetime.datetime(2011, 7, 1), + + # quarterly without space + '2000Q4': datetime.datetime(2000, 10, 1), + '00Q4': datetime.datetime(2000, 10, 1), + '4Q2000': datetime.datetime(2000, 10, 1), + '4Q00': datetime.datetime(2000, 10, 1), + '2000q4': datetime.datetime(2000, 10, 1), + + '2000-Q4': datetime.datetime(2000, 10, 1), + '00-Q4': datetime.datetime(2000, 10, 1), + '4Q-2000': datetime.datetime(2000, 10, 1), + '4Q-00': datetime.datetime(2000, 10, 1), + + '2000q4': datetime.datetime(2000, 10, 1), + '00q4': datetime.datetime(2000, 10, 1), + + '2005': datetime.datetime(2005, 1, 1), + '2005-11': datetime.datetime(2005, 11, 1), + '2005 11': datetime.datetime(2005, 11, 1), + '11-2005': datetime.datetime(2005, 11, 1), + '11 2005': datetime.datetime(2005, 11, 1), + '200511': datetime.datetime(2020, 5, 11), + '20051109': datetime.datetime(2005, 11, 9), + + '20051109 10:15': datetime.datetime(2005, 11, 9, 10, 15), + '20051109 08H': datetime.datetime(2005, 11, 9, 8, 0), + + '2005-11-09 10:15': datetime.datetime(2005, 11, 9, 10, 15), + '2005-11-09 08H': datetime.datetime(2005, 11, 9, 8, 0), + '2005/11/09 10:15': datetime.datetime(2005, 11, 9, 10, 15), + '2005/11/09 08H': datetime.datetime(2005, 11, 9, 8, 0), + + "Thu Sep 25 10:36:28 2003": datetime.datetime(2003, 9, 25, 10, 36, 28), + "Thu Sep 25 2003": datetime.datetime(2003, 9, 25), + "Sep 25 2003": datetime.datetime(2003, 9, 25), + "January 1 2014": datetime.datetime(2014, 1, 1), + + # GH 10537 + '2014-06': datetime.datetime(2014, 6, 1), + '06-2014': datetime.datetime(2014, 6, 1), + '2014-6': datetime.datetime(2014, 6, 1), + '6-2014': datetime.datetime(2014, 6, 1), + } + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str) + result2 = to_datetime(date_str) + result3 = to_datetime([date_str]) + result4 = to_datetime(np.array([date_str], dtype=object)) + result5 = Timestamp(date_str) + result6 = DatetimeIndex([date_str])[0] + result7 = date_range(date_str, freq='S', periods=1) + self.assertEqual(result1, expected) + self.assertEqual(result2, expected) + self.assertEqual(result3, expected) + self.assertEqual(result4, expected) + self.assertEqual(result5, expected) + self.assertEqual(result6, expected) + self.assertEqual(result7, expected) + + # NaT + result1, _, _ = tools.parse_time_string('NaT') + result2 = to_datetime('NaT') + result3 = Timestamp('NaT') + result4 = DatetimeIndex(['NaT'])[0] + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + + def test_parsers_quarter_invalid(self): + + cases = ['2Q 2005', '2Q-200A', '2Q-200', + '22Q2005', '6Q-20', '2Q200.'] + for case in cases: + self.assertRaises(ValueError, tools.parse_time_string, case) + + def test_parsers_dayfirst_yearfirst(self): + # str : dayfirst, yearfirst, expected + cases = {'10-11-12': [(False, False, datetime.datetime(2012, 10, 11)), + (True, False, datetime.datetime(2012, 11, 10)), + (False, True, datetime.datetime(2010, 11, 12)), + (True, True, datetime.datetime(2010, 11, 12))], + '20/12/21': [(False, False, datetime.datetime(2021, 12, 20)), + (True, False, datetime.datetime(2021, 12, 20)), + (False, True, datetime.datetime(2020, 12, 21)), + (True, True, datetime.datetime(2020, 12, 21))]} + + tm._skip_if_no_dateutil() + from dateutil.parser import parse + for date_str, values in compat.iteritems(cases): + for dayfirst, yearfirst ,expected in values: + result1, _, _ = tools.parse_time_string(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + + result2 = to_datetime(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + + result3 = DatetimeIndex([date_str], dayfirst=dayfirst, + yearfirst=yearfirst)[0] + + # Timestamp doesn't support dayfirst and yearfirst + + self.assertEqual(result1, expected) + self.assertEqual(result2, expected) + self.assertEqual(result3, expected) + + # compare with dateutil result + dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst) + self.assertEqual(dateutil_result, expected) + + def test_parsers_timestring(self): + tm._skip_if_no_dateutil() + from dateutil.parser import parse + + # must be the same as dateutil result + cases = {'10:15': (parse('10:15'), datetime.datetime(1, 1, 1, 10, 15)), + '9:05': (parse('9:05'), datetime.datetime(1, 1, 1, 9, 5)) } + + for date_str, (exp_now, exp_def) in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str) + result2 = to_datetime(date_str) + result3 = to_datetime([date_str]) + result4 = Timestamp(date_str) + result5 = DatetimeIndex([date_str])[0] + # parse time string return time string based on default date + # others are not, and can't be changed because it is used in + # time series plot + self.assertEqual(result1, exp_def) + self.assertEqual(result2, exp_now) + self.assertEqual(result3, exp_now) + self.assertEqual(result4, exp_now) + self.assertEqual(result5, exp_now) + + def test_parsers_monthfreq(self): + cases = {'201101': datetime.datetime(2011, 1, 1, 0, 0), + '200005': datetime.datetime(2000, 5, 1, 0, 0)} + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str, freq='M') + result2 = tools._to_datetime(date_str, freq='M') + self.assertEqual(result1, expected) + self.assertEqual(result2, expected) + + def test_parsers_quarterly_with_freq(self): + + msg = 'Incorrect quarterly string is given, quarter must be between 1 and 4: 2013Q5' + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q5') + + # GH 5418 + msg = 'Unable to retrieve month information from given freq: INVLD-L-DEC-SAT' + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') + + cases = {('2013Q2', None): datetime.datetime(2013, 4, 1), + ('2013Q2', 'A-APR'): datetime.datetime(2012, 8, 1), + ('2013-Q2', 'A-DEC'): datetime.datetime(2013, 4, 1)} + + for (date_str, freq), exp in compat.iteritems(cases): + result, _, _ = tools.parse_time_string(date_str, freq=freq) + self.assertEqual(result, exp) + class TestArrayToDatetime(tm.TestCase): def test_parsing_valid_dates(self): diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 65fe3420f670c..5ff6a48981ceb 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -12,9 +12,6 @@ try: import dateutil - from dateutil.parser import parse, DEFAULTPARSER - from dateutil.relativedelta import relativedelta - # raise exception if dateutil 2.0 install on 2.x platform if (sys.version_info[0] == 2 and dateutil.__version__ == '2.0'): # pragma: no cover @@ -173,9 +170,10 @@ def _guess_datetime_format_for_array(arr, **kwargs): if len(non_nan_elements): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) -def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, - format=None, exact=True, coerce=False, unit='ns', - infer_datetime_format=False): + +def to_datetime(arg, errors='ignore', dayfirst=False, yearfirst=False, + utc=None, box=True, format=None, exact=True, coerce=False, + unit='ns', infer_datetime_format=False): """ Convert argument to datetime. @@ -183,19 +181,26 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, ---------- arg : string, datetime, array of strings (with possible NAs) errors : {'ignore', 'raise'}, default 'ignore' - Errors are ignored by default (values left untouched) + Errors are ignored by default (values left untouched). dayfirst : boolean, default False - If True parses dates with the day first, eg 20/01/2005 + Specify a date parse order if `arg` is str or its list-likes. + If True, parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10. Warning: dayfirst=True is not strict, but will prefer to parse - with day first (this is a known bug). + with day first (this is a known bug, based on dateutil behavior). + yearfirst : boolean, default False + Specify a date parse order if `arg` is str or its list-likes. + If True parses dates with the year first, eg 10/11/12 is parsed as 2010-11-12. + If both dayfirst and yearfirst are True, yearfirst is preceded (same as dateutil). + Warning: yearfirst=True is not strict, but will prefer to parse + with year first (this is a known bug, based on dateutil beahavior). utc : boolean, default None Return UTC DatetimeIndex if True (converting any tz-aware - datetime.datetime objects as well) + datetime.datetime objects as well). box : boolean, default True - If True returns a DatetimeIndex, if False returns ndarray of values + If True returns a DatetimeIndex, if False returns ndarray of values. format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse - all the way up to nanoseconds + all the way up to nanoseconds. exact : boolean, True by default If True, require an exact format match. If False, allow the format to match anywhere in the target string. @@ -203,7 +208,7 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, Timestamps outside the interval between Timestamp.min and Timestamp.max (approximately 1677-09-22 to 2262-04-11) will be also forced to NaT. unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch - (e.g. a unix timestamp), which is an integer/float number + (e.g. a unix timestamp), which is an integer/float number. infer_datetime_format : boolean, default False If no `format` is given, try to infer the format based on the first datetime string. Provides a large speed-up in many cases. @@ -254,7 +259,18 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, >>> pd.to_datetime('13000101', format='%Y%m%d', coerce=True) NaT """ - from pandas import Timestamp + return _to_datetime(arg, errors=errors, dayfirst=dayfirst, yearfirst=yearfirst, + utc=utc, box=box, format=format, exact=exact, coerce=coerce, + unit=unit, infer_datetime_format=infer_datetime_format) + + +def _to_datetime(arg, errors='ignore', dayfirst=False, yearfirst=False, + utc=None, box=True, format=None, exact=True, coerce=False, + unit='ns', freq=None, infer_datetime_format=False): + """ + Same as to_datetime, but accept freq for + DatetimeIndex internal construction + """ from pandas.core.series import Series from pandas.tseries.index import DatetimeIndex @@ -326,6 +342,7 @@ def _convert_listlike(arg, box, format): if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime(arg, raise_=errors == 'raise', utc=utc, dayfirst=dayfirst, + yearfirst=yearfirst, freq=freq, coerce=coerce, unit=unit) if com.is_datetime64_dtype(result) and box: @@ -341,7 +358,7 @@ def _convert_listlike(arg, box, format): if arg is None: return arg - elif isinstance(arg, Timestamp): + elif isinstance(arg, tslib.Timestamp): return arg elif isinstance(arg, Series): values = _convert_listlike(arg.values, False, format) @@ -351,8 +368,6 @@ def _convert_listlike(arg, box, format): return _convert_listlike(np.array([ arg ]), box, format)[0] -class DateParseError(ValueError): - pass def _attempt_YYYYMMDD(arg, coerce): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, @@ -392,14 +407,6 @@ def calc_with_mask(carg,mask): return None -# patterns for quarters like '4Q2005', '05Q1' -qpat1full = re.compile(r'(\d)Q-?(\d\d\d\d)') -qpat2full = re.compile(r'(\d\d\d\d)-?Q(\d)') -qpat1 = re.compile(r'(\d)Q-?(\d\d)') -qpat2 = re.compile(r'(\d\d)-?Q(\d)') -ypat = re.compile(r'(\d\d\d\d)$') -has_time = re.compile('(.+)([\s]|T)+(.+)') - def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): """ @@ -421,183 +428,19 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): datetime, datetime/dateutil.parser._result, str """ from pandas.core.config import get_option - from pandas.tseries.offsets import DateOffset - from pandas.tseries.frequencies import (_get_rule_month, _month_numbers, - _get_freq_str) - if not isinstance(arg, compat.string_types): return arg - arg = arg.upper() - - default = datetime(1, 1, 1).replace(hour=0, minute=0, - second=0, microsecond=0) - - # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 - if len(arg) in [4, 5, 6, 7]: - m = ypat.match(arg) - if m: - ret = default.replace(year=int(m.group(1))) - return ret, ret, 'year' - - add_century = False - if len(arg) > 5: - qpats = [(qpat1full, 1), (qpat2full, 0)] - else: - add_century = True - qpats = [(qpat1, 1), (qpat2, 0)] - - for pat, yfirst in qpats: - qparse = pat.match(arg) - if qparse is not None: - if yfirst: - yi, qi = 1, 2 - else: - yi, qi = 2, 1 - q = int(qparse.group(yi)) - y_str = qparse.group(qi) - y = int(y_str) - if add_century: - y += 2000 - - if freq is not None: - # hack attack, #1228 - mnum = _month_numbers[_get_rule_month(freq)] + 1 - month = (mnum + (q - 1) * 3) % 12 + 1 - if month > mnum: - y -= 1 - else: - month = (q - 1) * 3 + 1 - - ret = default.replace(year=y, month=month) - return ret, ret, 'quarter' - - is_mo_str = freq is not None and freq == 'M' - is_mo_off = getattr(freq, 'rule_code', None) == 'M' - is_monthly = is_mo_str or is_mo_off - if len(arg) == 6 and is_monthly: - try: - ret = _try_parse_monthly(arg) - if ret is not None: - return ret, ret, 'month' - except Exception: - pass - - # montly f7u12 - mresult = _attempt_monthly(arg) - if mresult: - return mresult - if dayfirst is None: dayfirst = get_option("display.date_dayfirst") if yearfirst is None: yearfirst = get_option("display.date_yearfirst") - try: - parsed, reso = dateutil_parse(arg, default, dayfirst=dayfirst, - yearfirst=yearfirst) - except Exception as e: - # TODO: allow raise of errors within instead - raise DateParseError(e) - - if parsed is None: - raise DateParseError("Could not parse %s" % arg) - - return parsed, parsed, reso # datetime, resolution - - -def dateutil_parse(timestr, default, - ignoretz=False, tzinfos=None, - **kwargs): - """ lifted from dateutil to get resolution""" - from dateutil import tz - import time - fobj = StringIO(str(timestr)) - - res = DEFAULTPARSER._parse(fobj, **kwargs) - - # dateutil 2.2 compat - if isinstance(res, tuple): - res, _ = res - - if res is None: - raise ValueError("unknown string format") - - repl = {} - reso = None - for attr in ["year", "month", "day", "hour", - "minute", "second", "microsecond"]: - value = getattr(res, attr) - if value is not None: - repl[attr] = value - reso = attr - - if reso is None: - raise ValueError("Cannot parse date.") - - if reso == 'microsecond': - if repl['microsecond'] == 0: - reso = 'second' - elif repl['microsecond'] % 1000 == 0: - reso = 'millisecond' - - ret = default.replace(**repl) - if res.weekday is not None and not res.day: - ret = ret + relativedelta.relativedelta(weekday=res.weekday) - if not ignoretz: - if callable(tzinfos) or tzinfos and res.tzname in tzinfos: - if callable(tzinfos): - tzdata = tzinfos(res.tzname, res.tzoffset) - else: - tzdata = tzinfos.get(res.tzname) - if isinstance(tzdata, datetime.tzinfo): - tzinfo = tzdata - elif isinstance(tzdata, compat.string_types): - tzinfo = tz.tzstr(tzdata) - elif isinstance(tzdata, int): - tzinfo = tz.tzoffset(res.tzname, tzdata) - else: - raise ValueError("offset must be tzinfo subclass, " - "tz string, or int offset") - ret = ret.replace(tzinfo=tzinfo) - elif res.tzname and res.tzname in time.tzname: - ret = ret.replace(tzinfo=tz.tzlocal()) - elif res.tzoffset == 0: - ret = ret.replace(tzinfo=tz.tzutc()) - elif res.tzoffset: - ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) - return ret, reso - - -def _attempt_monthly(val): - pats = ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y'] - for pat in pats: - try: - ret = datetime.strptime(val, pat) - return ret, ret, 'month' - except Exception: - pass - - -def _try_parse_monthly(arg): - base = 2000 - add_base = False - default = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, - microsecond=0) - - if len(arg) == 4: - add_base = True - y = int(arg[:2]) - m = int(arg[2:4]) - elif len(arg) >= 6: # 201201 - y = int(arg[:4]) - m = int(arg[4:6]) - if add_base: - y += base - ret = default.replace(year=y, month=m) - return ret + return tslib.parse_datetime_string_with_reso(arg, freq=freq, dayfirst=dayfirst, + yearfirst=yearfirst) +DateParseError = tslib.DateParseError normalize_date = tslib.normalize_date diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 27cd5e89220a9..8dc7fe824247b 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -43,17 +43,22 @@ cimport cython from datetime import timedelta, datetime from datetime import time as datetime_time +import re + # dateutil compat from dateutil.tz import (tzoffset, tzlocal as _dateutil_tzlocal, tzfile as _dateutil_tzfile, - tzutc as _dateutil_tzutc) + tzutc as _dateutil_tzutc, tzstr as _dateutil_tzstr) + from pandas.compat import is_platform_windows if is_platform_windows(): from dateutil.zoneinfo import gettz as _dateutil_gettz else: from dateutil.tz import gettz as _dateutil_gettz +from dateutil.relativedelta import relativedelta +from dateutil.parser import DEFAULTPARSER from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo -from pandas.compat import parse_date, string_types, iteritems +from pandas.compat import parse_date, string_types, iteritems, StringIO import operator import collections @@ -219,8 +224,22 @@ class Timestamp(_Timestamp): and is interchangable with it in most cases. It's the type used for the entries that make up a DatetimeIndex, and other timeseries oriented data structures in pandas. + + Parameters + ---------- + ts_input : datetime-like, str, int, float + Value to be converted to Timestamp + offset : str, DateOffset + Offset which Timestamp will have + tz : string, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will have. + unit : string + numpy unit used for conversion, if ts_input is int or float """ + # Do not add ``dayfirst`` and ``yearfist`` to Timestamp based on the discussion + # https://github.com/pydata/pandas/pull/7599 + @classmethod def fromordinal(cls, ordinal, offset=None, tz=None): """ passed an ordinal, translate and convert to a ts @@ -1079,40 +1098,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit): obj = _TSObject() if util.is_string_object(ts): - if ts in _nat_strings: - ts = NaT - elif ts == 'now': - # Issue 9000, we short-circuit rather than going - # into np_datetime_strings which returns utc - ts = Timestamp.now(tz) - elif ts == 'today': - # Issue 9000, we short-circuit rather than going - # into np_datetime_strings which returns a normalized datetime - ts = Timestamp.today(tz) - else: - try: - _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) - obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts) - _check_dts_bounds(&obj.dts) - if out_local == 1: - obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') - if tz is None: - _check_dts_bounds(&obj.dts) - return obj - else: - # Keep the converter same as PyDateTime's - ts = Timestamp(obj.value, tz=obj.tzinfo) - else: - ts = obj.value - if tz is not None: - # shift for _localize_tso - ts = tz_convert_single(ts, tz, 'UTC') - except ValueError: - try: - ts = parse_datetime_string(ts) - except Exception: - raise ValueError + return convert_str_to_tsobject(ts, tz, unit) if ts is None or ts is NaT or ts is np_NaT: obj.value = NPY_NAT @@ -1196,6 +1182,56 @@ cdef convert_to_tsobject(object ts, object tz, object unit): return obj + +cpdef convert_str_to_tsobject(object ts, object tz, object unit, + dayfirst=False, yearfirst=False): + cdef: + _TSObject obj + int out_local = 0, out_tzoffset = 0 + + if tz is not None: + tz = maybe_get_tz(tz) + + obj = _TSObject() + + if ts in _nat_strings: + ts = NaT + elif ts == 'now': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns utc + ts = Timestamp.now(tz) + elif ts == 'today': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns a normalized datetime + ts = Timestamp.today(tz) + else: + try: + _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) + obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts) + _check_dts_bounds(&obj.dts) + if out_local == 1: + obj.tzinfo = pytz.FixedOffset(out_tzoffset) + obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') + if tz is None: + _check_dts_bounds(&obj.dts) + return obj + else: + # Keep the converter same as PyDateTime's + ts = Timestamp(obj.value, tz=obj.tzinfo) + else: + ts = obj.value + if tz is not None: + # shift for _localize_tso + ts = tz_convert_single(ts, tz, 'UTC') + except ValueError: + try: + ts = parse_datetime_string(ts, dayfirst=dayfirst, yearfirst=yearfirst) + except Exception: + raise ValueError + + return convert_to_tsobject(ts, tz, unit) + + cdef inline void _localize_tso(_TSObject obj, object tz): ''' Take a TSObject in UTC and localizes to timezone tz. @@ -1377,9 +1413,10 @@ def datetime_to_datetime64(ndarray[object] values): return result, inferred_tz -_not_datelike_strings = set(['a','A','m','M','p','P','t','T']) +cdef: + set _not_datelike_strings = set(['a','A','m','M','p','P','t','T']) -def _does_string_look_like_datetime(date_string): +cpdef object _does_string_look_like_datetime(object date_string): if date_string.startswith('0'): # Strings starting with 0 are more consistent with a # date-like string than a number @@ -1396,14 +1433,9 @@ def _does_string_look_like_datetime(date_string): return True -def parse_datetime_string(date_string, **kwargs): - if not _does_string_look_like_datetime(date_string): - raise ValueError('Given date string not likely a datetime.') - dt = parse_date(date_string, **kwargs) - return dt - -def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object format=None, object na_rep=None): +def format_array_from_datetime(ndarray[int64_t] values, object tz=None, + object format=None, object na_rep=None): """ return a np object array of the string formatted values @@ -1484,8 +1516,260 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object f return result -def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, - format=None, utc=None, coerce=False, unit=None): + +class DateParseError(ValueError): + pass + + +cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])') + + +def parse_datetime_string(object date_string, object freq=None, + dayfirst=False, yearfirst=False, **kwargs): + + """parse datetime string, only returns datetime. + Also cares special handling matching time patterns. + + Returns + ------- + datetime + """ + + cdef: + object dt + + if not _does_string_look_like_datetime(date_string): + raise ValueError('Given date string not likely a datetime.') + + if _TIMEPAT.match(date_string): + # use current datetime as default, not pass _DEFAULT_DATETIME + dt = parse_date(date_string, dayfirst=dayfirst, + yearfirst=yearfirst, **kwargs) + return dt + try: + dt, _, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) + return dt + except DateParseError: + raise + except ValueError: + pass + + dt = parse_date(date_string, default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + return dt + + +def parse_datetime_string_with_reso(object date_string, object freq=None, + dayfirst=False, yearfirst=False, **kwargs): + """parse datetime string, only returns datetime + + Returns + ------- + datetime + """ + + cdef: + object parsed, reso + + if not _does_string_look_like_datetime(date_string): + raise ValueError('Given date string not likely a datetime.') + + try: + return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) + except DateParseError: + raise + except ValueError: + pass + + try: + parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst) + except Exception as e: + # TODO: allow raise of errors within instead + raise DateParseError(e) + if parsed is None: + raise DateParseError("Could not parse %s" % date_string) + return parsed, parsed, reso + + +cdef inline object _parse_dateabbr_string(object date_string, object default, + object freq): + cdef: + object ret + int year, quarter, month, mnum, date_len + + # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 + + if date_string in _nat_strings: + return NaT, NaT, '' + + date_string = date_string.upper() + date_len = len(date_string) + + if date_len == 4: + # parse year only like 2000 + try: + ret = default.replace(year=int(date_string)) + return ret, ret, 'year' + except ValueError: + pass + + try: + if 4 <= date_len <= 7: + i = date_string.index('Q', 1, 6) + if i == 1: + quarter = int(date_string[0]) + if date_len == 4 or (date_len == 5 and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d)') + year = 2000 + int(date_string[-2:]) + elif date_len == 6 or (date_len == 7 and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d\d\d)') + year = int(date_string[-4:]) + else: + raise ValueError + elif i == 2 or i == 3: + # r'(\d\d)-?Q(\d)' + if date_len == 4 or (date_len == 5 and date_string[i - 1] == '-'): + quarter = int(date_string[-1]) + year = 2000 + int(date_string[:2]) + else: + raise ValueError + elif i == 4 or i == 5: + if date_len == 6 or (date_len == 7 and date_string[i - 1] == '-'): + # r'(\d\d\d\d)-?Q(\d)' + quarter = int(date_string[-1]) + year = int(date_string[:4]) + else: + raise ValueError + + if not (1 <= quarter <= 4): + msg = 'Incorrect quarterly string is given, quarter must be between 1 and 4: {0}' + raise DateParseError(msg.format(date_string)) + + if freq is not None: + # hack attack, #1228 + try: + mnum = _MONTH_NUMBERS[_get_rule_month(freq)] + 1 + except (KeyError, ValueError): + msg = 'Unable to retrieve month information from given freq: {0}'.format(freq) + raise DateParseError(msg) + + month = (mnum + (quarter - 1) * 3) % 12 + 1 + if month > mnum: + year -= 1 + else: + month = (quarter - 1) * 3 + 1 + + ret = default.replace(year=year, month=month) + return ret, ret, 'quarter' + + except DateParseError: + raise + except ValueError: + pass + + if date_len == 6 and (freq == 'M' or getattr(freq, 'rule_code', None) == 'M'): + year = int(date_string[:4]) + month = int(date_string[4:6]) + try: + ret = default.replace(year=year, month=month) + return ret, ret, 'month' + except ValueError: + pass + + for pat in ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y']: + try: + ret = datetime.strptime(date_string, pat) + return ret, ret, 'month' + except ValueError: + pass + + raise ValueError('Unable to parse {0}'.format(date_string)) + + +def dateutil_parse(object timestr, object default, ignoretz=False, + tzinfos=None, **kwargs): + """ lifted from dateutil to get resolution""" + + cdef: + object fobj, res, attr, ret, tzdata + object reso = None + dict repl = {} + + fobj = StringIO(str(timestr)) + res = DEFAULTPARSER._parse(fobj, **kwargs) + + # dateutil 2.2 compat + if isinstance(res, tuple): + res, _ = res + + if res is None: + raise ValueError("unknown string format") + + for attr in ["year", "month", "day", "hour", + "minute", "second", "microsecond"]: + value = getattr(res, attr) + if value is not None: + repl[attr] = value + reso = attr + + if reso is None: + raise ValueError("Cannot parse date.") + + if reso == 'microsecond': + if repl['microsecond'] == 0: + reso = 'second' + elif repl['microsecond'] % 1000 == 0: + reso = 'millisecond' + + ret = default.replace(**repl) + if res.weekday is not None and not res.day: + ret = ret + relativedelta.relativedelta(weekday=res.weekday) + if not ignoretz: + if callable(tzinfos) or tzinfos and res.tzname in tzinfos: + if callable(tzinfos): + tzdata = tzinfos(res.tzname, res.tzoffset) + else: + tzdata = tzinfos.get(res.tzname) + if isinstance(tzdata, datetime.tzinfo): + tzinfo = tzdata + elif isinstance(tzdata, string_types): + tzinfo = _dateutil_tzstr(tzdata) + elif isinstance(tzdata, int): + tzinfo = tzoffset(res.tzname, tzdata) + else: + raise ValueError("offset must be tzinfo subclass, " + "tz string, or int offset") + ret = ret.replace(tzinfo=tzinfo) + elif res.tzname and res.tzname in time.tzname: + ret = ret.replace(tzinfo=_dateutil_tzlocal()) + elif res.tzoffset == 0: + ret = ret.replace(tzinfo=_dateutil_tzutc()) + elif res.tzoffset: + ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset)) + return ret, reso + + +# const for parsers + +_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) +_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', + 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] +_MONTH_NUMBERS = dict((k, i) for i, k in enumerate(_MONTHS)) +_MONTH_ALIASES = dict((k + 1, v) for k, v in enumerate(_MONTHS)) + + +cpdef object _get_rule_month(object source, object default='DEC'): + source = source.upper() + if '-' not in source: + return default + else: + return source.split('-')[1] + + +cpdef array_to_datetime(ndarray[object] values, raise_=False, + dayfirst=False, yearfirst=False, freq=None, + format=None, utc=None, coerce=False, unit=None): cdef: Py_ssize_t i, n = len(values) object val, py_dt @@ -1577,7 +1861,6 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, elif val in _nat_strings: iresult[i] = iNaT continue - _string_to_dts(val, &dts, &out_local, &out_tzoffset) value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) if out_local == 1: @@ -1587,7 +1870,8 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, _check_dts_bounds(&dts) except ValueError: try: - py_dt = parse_datetime_string(val, dayfirst=dayfirst) + py_dt = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst, freq=freq) except Exception: if coerce: iresult[i] = iNaT @@ -1647,7 +1931,8 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, oresult[i] = 'NaT' continue try: - oresult[i] = parse_datetime_string(val, dayfirst=dayfirst) + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst, freq=freq) _pydatetime_to_dts(oresult[i], &dts) _check_dts_bounds(&dts) except Exception: @@ -1662,6 +1947,29 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, return oresult +def parse_str_array_to_datetime(ndarray values, dayfirst=False, + yearfirst=False, object freq=None): + """Shortcut to parse str array for quicker DatetimeIndex construction""" + cdef: + Py_ssize_t i, n = len(values) + object val, py_dt + ndarray[int64_t] iresult + _TSObject _ts + + iresult = np.empty(n, dtype='i8') + + for i in range(n): + val = values[i] + try: + py_dt = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst, freq=freq) + except Exception: + raise ValueError + _ts = convert_to_tsobject(py_dt, None, None) + iresult[i] = _ts.value + + return iresult + # Similar to Timestamp/datetime, this is a construction requirement for timedeltas # we need to do object instantiation in python # This will serve as a C extension type that