Skip to content

Commit afb1bee

Browse files
committed
Merge remote-tracking branch 'upstream/master' into 24986-nested-array
2 parents 86948a1 + 145ade2 commit afb1bee

19 files changed

+316
-134
lines changed

doc/source/user_guide/io.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,36 @@ a single date rather than the entire array.
989989
990990
os.remove('tmp.csv')
991991
992+
993+
.. _io.csv.mixed_timezones:
994+
995+
Parsing a CSV with mixed Timezones
996+
++++++++++++++++++++++++++++++++++
997+
998+
Pandas cannot natively represent a column or index with mixed timezones. If your CSV
999+
file contains columns with a mixture of timezones, the default result will be
1000+
an object-dtype column with strings, even with ``parse_dates``.
1001+
1002+
1003+
.. ipython:: python
1004+
1005+
content = """\
1006+
a
1007+
2000-01-01T00:00:00+05:00
1008+
2000-01-01T00:00:00+06:00"""
1009+
df = pd.read_csv(StringIO(content), parse_dates=['a'])
1010+
df['a']
1011+
1012+
To parse the mixed-timezone values as a datetime column, pass a partially-applied
1013+
:func:`to_datetime` with ``utc=True`` as the ``date_parser``.
1014+
1015+
.. ipython:: python
1016+
1017+
df = pd.read_csv(StringIO(content), parse_dates=['a'],
1018+
date_parser=lambda col: pd.to_datetime(col, utc=True))
1019+
df['a']
1020+
1021+
9921022
.. _io.dayfirst:
9931023

9941024

doc/source/whatsnew/v0.24.0.rst

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,52 @@ that the dates have been converted to UTC
648648
pd.to_datetime(["2015-11-18 15:30:00+05:30",
649649
"2015-11-18 16:30:00+06:30"], utc=True)
650650
651+
652+
.. _whatsnew_0240.api_breaking.read_csv_mixed_tz:
653+
654+
Parsing mixed-timezones with :func:`read_csv`
655+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
656+
657+
:func:`read_csv` no longer silently converts mixed-timezone columns to UTC (:issue:`24987`).
658+
659+
*Previous Behavior*
660+
661+
.. code-block:: python
662+
663+
>>> import io
664+
>>> content = """\
665+
... a
666+
... 2000-01-01T00:00:00+05:00
667+
... 2000-01-01T00:00:00+06:00"""
668+
>>> df = pd.read_csv(io.StringIO(content), parse_dates=['a'])
669+
>>> df.a
670+
0 1999-12-31 19:00:00
671+
1 1999-12-31 18:00:00
672+
Name: a, dtype: datetime64[ns]
673+
674+
*New Behavior*
675+
676+
.. ipython:: python
677+
678+
import io
679+
content = """\
680+
a
681+
2000-01-01T00:00:00+05:00
682+
2000-01-01T00:00:00+06:00"""
683+
df = pd.read_csv(io.StringIO(content), parse_dates=['a'])
684+
df.a
685+
686+
As can be seen, the ``dtype`` is object; each value in the column is a string.
687+
To convert the strings to an array of datetimes, the ``date_parser`` argument
688+
689+
.. ipython:: python
690+
691+
df = pd.read_csv(io.StringIO(content), parse_dates=['a'],
692+
date_parser=lambda col: pd.to_datetime(col, utc=True))
693+
df.a
694+
695+
See :ref:`whatsnew_0240.api.timezone_offset_parsing` for more.
696+
651697
.. _whatsnew_0240.api_breaking.period_end_time:
652698

653699
Time values in ``dt.end_time`` and ``to_timestamp(how='end')``

doc/source/whatsnew/v0.24.1.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ Bug Fixes
7474

7575
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
7676

77+
**Visualization**
78+
79+
- Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`).
80+
81+
7782
**Other**
7883

7984
-

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2058,7 +2058,7 @@ def validate_tz_from_dtype(dtype, tz):
20582058
# tz-naive dtype (i.e. datetime64[ns])
20592059
if tz is not None and not timezones.tz_compare(tz, dtz):
20602060
raise ValueError("cannot supply both a tz and a "
2061-
"timezone-naive dtype (i.e. datetime64[ns]")
2061+
"timezone-naive dtype (i.e. datetime64[ns])")
20622062

20632063
return tz
20642064

pandas/io/parsers.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,14 @@
203203
* dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
204204
result 'foo'
205205
206-
If a column or index contains an unparseable date, the entire column or
207-
index will be returned unaltered as an object data type. For non-standard
208-
datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``
206+
If a column or index cannot be represented as an array of datetimes,
207+
say because of an unparseable value or a mixture of timezones, the column
208+
or index will be returned unaltered as an object data type. For
209+
non-standard datetime parsing, use ``pd.to_datetime`` after
210+
``pd.read_csv``. To parse an index or column with a mixture of timezones,
211+
specify ``date_parser`` to be a partially-applied
212+
:func:`pandas.to_datetime` with ``utc=True``. See
213+
:ref:`io.csv.mixed_timezones` for more.
209214
210215
Note: A fast-path exists for iso8601-formatted dates.
211216
infer_datetime_format : bool, default False

pandas/plotting/_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
else:
4040
_HAS_MPL = True
4141
if get_option('plotting.matplotlib.register_converters'):
42-
_converter.register(explicit=True)
42+
_converter.register(explicit=False)
4343

4444

4545
def _raise_if_no_mpl():

pandas/tests/indexes/common.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,12 @@ def setup_indices(self):
3030

3131
def test_pickle_compat_construction(self):
3232
# need an object to create with
33-
pytest.raises(TypeError, self._holder)
33+
msg = (r"Index\(\.\.\.\) must be called with a collection of some"
34+
r" kind, None was passed|"
35+
r"__new__\(\) missing 1 required positional argument: 'data'|"
36+
r"__new__\(\) takes at least 2 arguments \(1 given\)")
37+
with pytest.raises(TypeError, match=msg):
38+
self._holder()
3439

3540
def test_to_series(self):
3641
# assert that we are creating a copy of the index
@@ -84,8 +89,11 @@ def test_shift(self):
8489

8590
# GH8083 test the base class for shift
8691
idx = self.create_index()
87-
pytest.raises(NotImplementedError, idx.shift, 1)
88-
pytest.raises(NotImplementedError, idx.shift, 1, 2)
92+
msg = "Not supported for type {}".format(type(idx).__name__)
93+
with pytest.raises(NotImplementedError, match=msg):
94+
idx.shift(1)
95+
with pytest.raises(NotImplementedError, match=msg):
96+
idx.shift(1, 2)
8997

9098
def test_create_index_existing_name(self):
9199

pandas/tests/indexes/datetimes/test_construction.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
135135
tm.assert_index_equal(i2, expected)
136136

137137
# incompat tz/dtype
138-
pytest.raises(ValueError, lambda: DatetimeIndex(
139-
i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific'))
138+
msg = "cannot supply both a tz and a dtype with a tz"
139+
with pytest.raises(ValueError, match=msg):
140+
DatetimeIndex(i.tz_localize(None).asi8,
141+
dtype=i.dtype, tz='US/Pacific')
140142

141143
def test_construction_index_with_mixed_timezones(self):
142144
# gh-11488: no tz results in DatetimeIndex
@@ -439,14 +441,19 @@ def test_constructor_coverage(self):
439441
tm.assert_index_equal(from_ints, expected)
440442

441443
# non-conforming
442-
pytest.raises(ValueError, DatetimeIndex,
443-
['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')
444+
msg = ("Inferred frequency None from passed values does not conform"
445+
" to passed frequency D")
446+
with pytest.raises(ValueError, match=msg):
447+
DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')
444448

445-
pytest.raises(ValueError, date_range, start='2011-01-01',
446-
freq='b')
447-
pytest.raises(ValueError, date_range, end='2011-01-01',
448-
freq='B')
449-
pytest.raises(ValueError, date_range, periods=10, freq='D')
449+
msg = ("Of the four parameters: start, end, periods, and freq, exactly"
450+
" three must be specified")
451+
with pytest.raises(ValueError, match=msg):
452+
date_range(start='2011-01-01', freq='b')
453+
with pytest.raises(ValueError, match=msg):
454+
date_range(end='2011-01-01', freq='B')
455+
with pytest.raises(ValueError, match=msg):
456+
date_range(periods=10, freq='D')
450457

451458
@pytest.mark.parametrize('freq', ['AS', 'W-SUN'])
452459
def test_constructor_datetime64_tzformat(self, freq):
@@ -511,18 +518,20 @@ def test_constructor_dtype(self):
511518
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
512519
dtype='datetime64[ns, US/Eastern]')
513520

514-
pytest.raises(ValueError,
515-
lambda: DatetimeIndex(idx,
516-
dtype='datetime64[ns]'))
521+
msg = ("cannot supply both a tz and a timezone-naive dtype"
522+
r" \(i\.e\. datetime64\[ns\]\)")
523+
with pytest.raises(ValueError, match=msg):
524+
DatetimeIndex(idx, dtype='datetime64[ns]')
517525

518526
# this is effectively trying to convert tz's
519-
pytest.raises(TypeError,
520-
lambda: DatetimeIndex(idx,
521-
dtype='datetime64[ns, CET]'))
522-
pytest.raises(ValueError,
523-
lambda: DatetimeIndex(
524-
idx, tz='CET',
525-
dtype='datetime64[ns, US/Eastern]'))
527+
msg = ("data is already tz-aware US/Eastern, unable to set specified"
528+
" tz: CET")
529+
with pytest.raises(TypeError, match=msg):
530+
DatetimeIndex(idx, dtype='datetime64[ns, CET]')
531+
msg = "cannot supply both a tz and a dtype with a tz"
532+
with pytest.raises(ValueError, match=msg):
533+
DatetimeIndex(idx, tz='CET', dtype='datetime64[ns, US/Eastern]')
534+
526535
result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]')
527536
tm.assert_index_equal(idx, result)
528537

@@ -732,7 +741,9 @@ def test_from_freq_recreate_from_data(self, freq):
732741

733742
def test_datetimeindex_constructor_misc(self):
734743
arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04']
735-
pytest.raises(Exception, DatetimeIndex, arr)
744+
msg = r"(\(u?')?Unknown string format(:', 'Jn 3, 2005'\))?"
745+
with pytest.raises(ValueError, match=msg):
746+
DatetimeIndex(arr)
736747

737748
arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']
738749
idx1 = DatetimeIndex(arr)

pandas/tests/indexes/datetimes/test_date_range.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,10 @@ def test_compat_replace(self, f):
346346
def test_catch_infinite_loop(self):
347347
offset = offsets.DateOffset(minute=5)
348348
# blow up, don't loop forever
349-
pytest.raises(Exception, date_range, datetime(2011, 11, 11),
350-
datetime(2011, 11, 12), freq=offset)
349+
msg = "Offset <DateOffset: minute=5> did not increment date"
350+
with pytest.raises(ValueError, match=msg):
351+
date_range(datetime(2011, 11, 11), datetime(2011, 11, 12),
352+
freq=offset)
351353

352354
@pytest.mark.parametrize('periods', (1, 2))
353355
def test_wom_len(self, periods):

pandas/tests/indexes/datetimes/test_misc.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,9 @@ def test_datetimeindex_accessors(self):
190190
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
191191
bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu')
192192
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
193-
pytest.raises(ValueError, lambda: dti.is_month_start)
193+
msg = "Custom business days is not supported by is_month_start"
194+
with pytest.raises(ValueError, match=msg):
195+
dti.is_month_start
194196

195197
dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])
196198

0 commit comments

Comments
 (0)