From 7dc9406dd469e027e5bc99035505c608cc55ac34 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 23 Apr 2016 10:31:52 -0400 Subject: [PATCH 1/2] ENH: allow construction of datetimes from columns in a DataFrame closes #8158 --- doc/source/timeseries.rst | 26 ++++- doc/source/whatsnew/v0.18.1.txt | 18 +++ pandas/tseries/tests/test_period.py | 10 -- pandas/tseries/tests/test_timeseries.py | 125 ++++++++++++++++++++ pandas/tseries/tools.py | 145 +++++++++++++++++++++++- 5 files changed, 308 insertions(+), 16 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 4035d016a8fc6..576d9a52011ca 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -189,9 +189,31 @@ or ``format``, use ``to_datetime`` if these are required. .. ipython:: python - to_datetime('2010/11/12') + pd.to_datetime('2010/11/12') - Timestamp('2010/11/12') + pd.Timestamp('2010/11/12') + +.. versionadded:: 0.18.1 + +You can also pass a ``DataFrame`` of integer or string columns to assemble into a ``Series`` +of ``Timestamps``. + +.. ipython:: python + + df = pd.pd.DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [2, 3]}) + pd.to_datetime(df) + + +You can pass only the columns that you need to assemble. + +.. ipython:: python + + pd.to_datetime(df[['year', 'month', 'day']]) + +.. _whatsnew_0181.other: Invalid Data diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index c270f1b9fab86..f764c5b9b4180 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -69,6 +69,24 @@ Partial string indexing now matches on ``DateTimeIndex`` when part of a ``MultiI dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] +.. _whatsnew_0181.enhancements.assembling: + +Assembling Datetimes +^^^^^^^^^^^^^^^^^^^^ + +``pd.to_datetime()`` has gained the ability to assemble datetimes from a passed in ``DataFrame`` or a dict. (:issue:`8158`). + +.. ipython:: python + + df = pd.DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [2, 3]}) + pd.to_datetime(df) + + # pass only the columns that you need to assemble + pd.to_datetime(df[['year', 'month', 'day']]) + .. _whatsnew_0181.other: Other Enhancements diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 013b7a754a3fd..78f84aa243cd9 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -3144,16 +3144,6 @@ def test_to_datetime_1703(self): result = index.to_datetime() self.assertEqual(result[0], Timestamp('1/1/2012')) - def test_to_datetime_dimensions(self): - # GH 11776 - df = DataFrame({'a': ['1/1/2012', '1/2/2012'], - 'b': ['12/30/2012', '12/31/2012']}) - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_datetime(df) - for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_datetime(df, errors=errors) - def test_get_loc_msg(self): idx = period_range('2000-1-1', freq='A', periods=10) bad_period = Period('2012', 'A') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index eea8cf934ee7b..dfec081123f01 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -2283,6 +2283,131 @@ def _simple_ts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) +class TestToDatetime(tm.TestCase): + _multiprocess_can_split_ = True + + # TODO: move all to_datetime tests not covered in other + # classes here + + def test_dataframe(self): + + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [6, 7], + 'minute': [58, 59], + 'second': [10, 11], + 'ms': [1, 1], + 'us': [2, 2], + 'ns': [3, 3]}) + + result = to_datetime({'year': df['year'], + 'month': df['month'], + 'day': df['day']}) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:0:00')]) + assert_series_equal(result, expected) + + # dict-like + result = to_datetime(df[['year', 'month', 'day']].to_dict()) + assert_series_equal(result, expected) + + # dict but with constructable + df2 = df[['year', 'month', 'day']].to_dict() + df2['month'] = 2 + result = to_datetime(df2) + expected2 = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160205 00:0:00')]) + assert_series_equal(result, expected2) + + # unit mappings + units = [{'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'HH', + 'minute': 'MM', + 'second': 'SS'}, + {'year': '%Y', + 'month': '%m', + 'day': '%d', + 'hour': '%H', + 'minute': '%M', + 'second': '%S'}, + {'year': 'y', + 'month': 'month', + 'day': 'd', + 'hour': 'h', + 'minute': 'm', + 'second': 's'}, + ] + + for d in units: + result = to_datetime(df[list(d.keys())].rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10'), + Timestamp('20160305 07:59:11')]) + assert_series_equal(result, expected) + + d = {'year': 'y', + 'month': 'month', + 'day': 'd', + 'hour': 'h', + 'minute': 'm', + 'second': 's', + 'ms': 'ms', + 'us': 'us', + 'ns': 'ns'} + + result = to_datetime(df.rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10.001002003'), + Timestamp('20160305 07:59:11.001002003')]) + assert_series_equal(result, expected) + + # coerce back to int + result = to_datetime(df.astype(str), unit=d) + assert_series_equal(result, expected) + + # passing coerce + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + with self.assertRaises(ValueError): + to_datetime(df2) + result = to_datetime(df2, errors='coerce') + expected = Series([Timestamp('20150204 00:00:00'), + pd.NaT]) + assert_series_equal(result, expected) + + # extra columns + with self.assertRaises(ValueError): + df2 = df.copy() + df2['foo'] = 1 + to_datetime(df2) + + # not enough + for c in [['year'], + ['year', 'month'], + ['year', 'month', 'second'], + ['month', 'day'], + ['year', 'day', 'second']]: + with self.assertRaises(ValueError): + to_datetime(df[c]) + + # duplicates + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + df2.columns = ['year', 'year', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5], + 'hour': [4, 5]}) + df2.columns = ['year', 'month', 'day', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + class TestDatetimeIndex(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d413a4a2bf096..acd8ec4b9bfe9 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -1,10 +1,11 @@ from datetime import datetime, timedelta, time import numpy as np +from collections import MutableMapping import pandas.lib as lib import pandas.tslib as tslib import pandas.core.common as com -from pandas.core.common import ABCIndexClass +from pandas.core.common import ABCIndexClass, ABCSeries, ABCDataFrame import pandas.compat as compat from pandas.util.decorators import deprecate_kwarg @@ -175,7 +176,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, Parameters ---------- - arg : string, datetime, list, tuple, 1-d array, or Series + arg : string, datetime, list, tuple, 1-d array, Series + + .. versionadded: 0.18.1 + + or DataFrame/dict-like + errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception @@ -282,6 +288,18 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') NaT + + Assembling a datetime from multiple columns of a DataFrame. The keys can be + strplike (%Y, %m) or common abbreviations like ('year', 'month') + + >>> df = pd.DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5]}) + >>> pd.to_datetime(df) + 0 2015-02-04 + 1 2016-03-05 + dtype: datetime64[ns] + """ return _to_datetime(arg, errors=errors, dayfirst=dayfirst, yearfirst=yearfirst, @@ -296,7 +314,6 @@ def _to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, Same as to_datetime, but accept freq for DatetimeIndex internal construction """ - from pandas.core.series import Series from pandas.tseries.index import DatetimeIndex def _convert_listlike(arg, box, format, name=None): @@ -407,9 +424,12 @@ def _convert_listlike(arg, box, format, name=None): return arg elif isinstance(arg, tslib.Timestamp): return arg - elif isinstance(arg, Series): + elif isinstance(arg, ABCSeries): + from pandas import Series values = _convert_listlike(arg._values, False, format) return Series(values, index=arg.index, name=arg.name) + elif isinstance(arg, (ABCDataFrame, MutableMapping)): + return _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, box, format, name=arg.name) elif com.is_list_like(arg): @@ -417,6 +437,123 @@ def _convert_listlike(arg, box, format, name=None): return _convert_listlike(np.array([arg]), box, format)[0] +# mappings for assembling units +_unit_map = {'year': 'year', + 'y': 'year', + '%Y': 'year', + 'month': 'month', + 'M': 'month', + '%m': 'month', + 'day': 'day', + 'days': 'day', + 'd': 'day', + '%d': 'day', + 'h': 'h', + 'hour': 'h', + 'hh': 'h', + '%H': 'h', + 'minute': 'm', + 't': 'm', + 'min': 'm', + '%M': 'm', + 'mm': 'm', + 'MM': 'm', + '%M': 'm', + 's': 's', + 'seconds': 's', + 'second': 's', + '%S': 's', + 'ss': 's', + 'ms': 'ms', + 'millisecond': 'ms', + 'milliseconds': 'ms', + 'us': 'us', + 'microsecond': 'us', + 'microseconds': 'us', + 'ns': 'ns', + 'nanosecond': 'ns', + 'nanoseconds': 'ns' + } + + +def _assemble_from_unit_mappings(arg, errors): + """ + assemble the unit specifed fields from the arg (DataFrame) + Return a Series for actual parsing + + Parameters + ---------- + arg : DataFrame + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as NaT + - If 'ignore', then invalid parsing will return the input + + Returns + ------- + Series + """ + from pandas import to_timedelta, to_numeric, DataFrame + arg = DataFrame(arg) + if not arg.columns.is_unique: + raise ValueError("cannot assemble with duplicate keys") + + # replace passed unit with _unit_map + def f(value): + if value in _unit_map: + return _unit_map[value] + + # m is case significant + if value.lower() in _unit_map and not value.startswith('m'): + return _unit_map[value.lower()] + + return value + + unit = {k: f(k) for k in arg.keys()} + unit_rev = {v: k for k, v in unit.items()} + + # we require at least Ymd + required = ['year', 'month', 'day'] + req = sorted(list(set(required) - set(unit_rev.keys()))) + if len(req): + raise ValueError("to assemble mappings with a dict of " + "units, requires year, month, day: " + "[{0}] is missing".format(','.join(req))) + + # keys we don't recognize + excess = sorted(list(set(unit_rev.keys()) - set(_unit_map.values()))) + if len(excess): + raise ValueError("extra keys have been passed " + "to the datetime assemblage: " + "[{0}]".format(','.join(excess))) + + def coerce(values): + # we allow coercion to if errors allows + return to_numeric(values, errors=errors) + + values = (coerce(arg[unit_rev['year']]) * 10000 + + coerce(arg[unit_rev['month']]) * 100 + + coerce(arg[unit_rev['day']])) + try: + values = to_datetime(values, format='%Y%m%d', errors=errors) + except (TypeError, ValueError) as e: + raise ValueError("cannot assemble the " + "datetimes: {0}".format(e)) + + for u in ['h', 'm', 's', 'ms', 'us', 'ns']: + value = unit_rev.get(u) + if value is not None and value in arg: + try: + values += to_timedelta(coerce(arg[value]), + unit=u, + errors=errors) + except (TypeError, ValueError) as e: + raise ValueError("cannot assemble the datetimes " + "[{0}]: {1}".format(value, e)) + + return values + def _attempt_YYYYMMDD(arg, errors): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, From e18c9cc7dcbc6e17c843517a05f348e878a68bcc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 25 Apr 2016 17:48:31 -0400 Subject: [PATCH 2/2] TST: move .to_datetime() tests to new testing class --- doc/source/timeseries.rst | 5 +- pandas/tseries/tests/test_timeseries.py | 299 ++++++++++++------------ pandas/tseries/tools.py | 2 +- 3 files changed, 151 insertions(+), 155 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 576d9a52011ca..e8f1404d79c9f 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -195,8 +195,7 @@ or ``format``, use ``to_datetime`` if these are required. .. versionadded:: 0.18.1 -You can also pass a ``DataFrame`` of integer or string columns to assemble into a ``Series`` -of ``Timestamps``. +You can also pass a ``DataFrame`` of integer or string columns to assemble into a ``Series`` of ``Timestamps``. .. ipython:: python @@ -213,8 +212,6 @@ You can pass only the columns that you need to assemble. pd.to_datetime(df[['year', 'month', 'day']]) -.. _whatsnew_0181.other: - Invalid Data ~~~~~~~~~~~~ diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index dfec081123f01..4eca3bc6ba3af 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1049,154 +1049,6 @@ def test_to_datetime_list_of_integers(self): self.assertTrue(rng.equals(result)) - def test_to_datetime_dt64s(self): - in_bound_dts = [ - np.datetime64('2000-01-01'), - np.datetime64('2000-01-02'), - ] - - for dt in in_bound_dts: - self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) - - oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] - - for dt in oob_dts: - self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') - self.assertRaises(ValueError, tslib.Timestamp, dt) - self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) - - def test_to_datetime_array_of_dt64s(self): - dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] - - # Assuming all datetimes are in bounds, to_datetime() returns - # an array that is equal to Timestamp() parsing - self.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), - np.array([Timestamp(x).asm8 for x in dts]) - ) - - # A list of datetimes where the last one is out of bounds - dts_with_oob = dts + [np.datetime64('9999-01-01')] - - self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') - - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), - np.array( - [ - Timestamp(dts_with_oob[0]).asm8, - Timestamp(dts_with_oob[1]).asm8, - iNaT, - ], - dtype='M8' - ) - ) - - # With errors='ignore', out of bounds datetime64s - # are converted to their .item(), which depending on the version of - # numpy is either a python datetime.datetime or datetime.date - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), - np.array( - [dt.item() for dt in dts_with_oob], - dtype='O' - ) - ) - - def test_to_datetime_tz(self): - - # xref 8260 - # uniform returns a DatetimeIndex - arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) - expected = DatetimeIndex( - ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') - tm.assert_index_equal(result, expected) - - # mixed tzs will raise - arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) - - def test_to_datetime_tz_pytz(self): - - # xref 8260 - tm._skip_if_no_pytz() - import pytz - - us_eastern = pytz.timezone('US/Eastern') - arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, - hour=3, minute=0)), - us_eastern.localize(datetime(year=2000, month=6, day=1, - hour=3, minute=0))], - dtype=object) - result = pd.to_datetime(arr, utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) - - def test_to_datetime_tz_psycopg2(self): - - # xref 8260 - try: - import psycopg2 - except ImportError: - raise nose.SkipTest("no psycopg2 installed") - - # misc cases - tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) - tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) - arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), - datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], - dtype=object) - - result = pd.to_datetime(arr, errors='coerce', utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - # dtype coercion - i = pd.DatetimeIndex([ - '2000-01-01 08:00:00+00:00' - ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertFalse(com.is_datetime64_ns_dtype(i)) - - # tz coerceion - result = pd.to_datetime(i, errors='coerce') - tm.assert_index_equal(result, i) - - result = pd.to_datetime(i, errors='coerce', utc=True) - expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], - dtype='datetime64[ns, UTC]') - tm.assert_index_equal(result, expected) - - def test_index_to_datetime(self): - idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) - - result = idx.to_datetime() - expected = DatetimeIndex(datetools.to_datetime(idx.values)) - self.assertTrue(result.equals(expected)) - - today = datetime.today() - idx = Index([today], dtype=object) - result = idx.to_datetime() - expected = DatetimeIndex([today]) - self.assertTrue(result.equals(expected)) - def test_to_datetime_freq(self): xp = bdate_range('2000-1-1', periods=10, tz='UTC') rs = xp.to_datetime() @@ -2286,8 +2138,153 @@ def _simple_ts(start, end, freq='D'): class TestToDatetime(tm.TestCase): _multiprocess_can_split_ = True - # TODO: move all to_datetime tests not covered in other - # classes here + def test_to_datetime_dt64s(self): + in_bound_dts = [ + np.datetime64('2000-01-01'), + np.datetime64('2000-01-02'), + ] + + for dt in in_bound_dts: + self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) + + oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] + + for dt in oob_dts: + self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') + self.assertRaises(ValueError, tslib.Timestamp, dt) + self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) + + def test_to_datetime_array_of_dt64s(self): + dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] + + # Assuming all datetimes are in bounds, to_datetime() returns + # an array that is equal to Timestamp() parsing + self.assert_numpy_array_equal( + pd.to_datetime(dts, box=False), + np.array([Timestamp(x).asm8 for x in dts]) + ) + + # A list of datetimes where the last one is out of bounds + dts_with_oob = dts + [np.datetime64('9999-01-01')] + + self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, + errors='raise') + + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + np.array( + [ + Timestamp(dts_with_oob[0]).asm8, + Timestamp(dts_with_oob[1]).asm8, + iNaT, + ], + dtype='M8' + ) + ) + + # With errors='ignore', out of bounds datetime64s + # are converted to their .item(), which depending on the version of + # numpy is either a python datetime.datetime or datetime.date + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + np.array( + [dt.item() for dt in dts_with_oob], + dtype='O' + ) + ) + + def test_to_datetime_tz(self): + + # xref 8260 + # uniform returns a DatetimeIndex + arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] + result = pd.to_datetime(arr) + expected = DatetimeIndex( + ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') + tm.assert_index_equal(result, expected) + + # mixed tzs will raise + arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] + self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) + + def test_to_datetime_tz_pytz(self): + + # xref 8260 + tm._skip_if_no_pytz() + import pytz + + us_eastern = pytz.timezone('US/Eastern') + arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, + hour=3, minute=0)), + us_eastern.localize(datetime(year=2000, month=6, day=1, + hour=3, minute=0))], + dtype=object) + result = pd.to_datetime(arr, utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + def test_to_datetime_utc_is_true(self): + # See gh-11934 + start = pd.Timestamp('2014-01-01', tz='utc') + end = pd.Timestamp('2014-01-03', tz='utc') + date_range = pd.bdate_range(start, end) + + result = pd.to_datetime(date_range, utc=True) + expected = pd.DatetimeIndex(data=date_range) + tm.assert_index_equal(result, expected) + + def test_to_datetime_tz_psycopg2(self): + + # xref 8260 + try: + import psycopg2 + except ImportError: + raise nose.SkipTest("no psycopg2 installed") + + # misc cases + tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) + tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) + arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), + datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], + dtype=object) + + result = pd.to_datetime(arr, errors='coerce', utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + # dtype coercion + i = pd.DatetimeIndex([ + '2000-01-01 08:00:00+00:00' + ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) + self.assertFalse(com.is_datetime64_ns_dtype(i)) + + # tz coerceion + result = pd.to_datetime(i, errors='coerce') + tm.assert_index_equal(result, i) + + result = pd.to_datetime(i, errors='coerce', utc=True) + expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], + dtype='datetime64[ns, UTC]') + tm.assert_index_equal(result, expected) + + def test_index_to_datetime(self): + idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) + + result = idx.to_datetime() + expected = DatetimeIndex(datetools.to_datetime(idx.values)) + self.assertTrue(result.equals(expected)) + + today = datetime.today() + idx = Index([today], dtype=object) + result = idx.to_datetime() + expected = DatetimeIndex([today]) + self.assertTrue(result.equals(expected)) def test_dataframe(self): @@ -2408,6 +2405,7 @@ def test_dataframe(self): with self.assertRaises(ValueError): to_datetime(df2) + class TestDatetimeIndex(tm.TestCase): _multiprocess_can_split_ = True @@ -4944,6 +4942,7 @@ def test_to_datetime_format_weeks(self): class TestToDatetimeInferFormat(tm.TestCase): + def test_to_datetime_infer_datetime_format_consistent_format(self): time_series = pd.Series(pd.date_range('20000101', periods=50, freq='H')) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index acd8ec4b9bfe9..adad34bb32169 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -290,7 +290,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, Assembling a datetime from multiple columns of a DataFrame. The keys can be - strplike (%Y, %m) or common abbreviations like ('year', 'month') + strptime-like (%Y, %m) or common abbreviations like ('year', 'month') >>> df = pd.DataFrame({'year': [2015, 2016], 'month': [2, 3],