diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index d7feb6e547b22..d09c9a4cd6a3d 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -676,6 +676,7 @@ I/O
- :func:`read_html()` no longer ignores all-whitespace ``
`` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
+- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
-
Plotting
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 4b3fa08e5e4af..08fb0172adcff 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1620,7 +1620,6 @@ def _infer_types(self, values, na_values, try_num_bool=True):
converted : ndarray
na_count : int
"""
-
na_count = 0
if issubclass(values.dtype.type, (np.number, np.bool_)):
mask = algorithms.isin(values, list(na_values))
@@ -1633,20 +1632,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
if try_num_bool:
try:
- result = lib.maybe_convert_numeric(values, na_values, False)
+ result = lib.maybe_convert_numeric(np.asarray(values),
+ na_values, False)
na_count = isna(result).sum()
except Exception:
result = values
if values.dtype == np.object_:
- na_count = parsers.sanitize_objects(result, na_values,
- False)
+ na_count = parsers.sanitize_objects(np.asarray(result),
+ na_values, False)
else:
result = values
if values.dtype == np.object_:
- na_count = parsers.sanitize_objects(values, na_values, False)
+ na_count = parsers.sanitize_objects(np.asarray(values),
+ na_values, False)
if result.dtype == np.object_ and try_num_bool:
- result = libops.maybe_convert_bool(values,
+ result = libops.maybe_convert_bool(np.asarray(values),
true_values=self.true_values,
false_values=self.false_values)
@@ -3033,7 +3034,7 @@ def converter(*date_cols):
return tools.to_datetime(
ensure_object(strs),
utc=None,
- box=False,
+ box=True,
dayfirst=dayfirst,
errors='ignore',
infer_datetime_format=infer_datetime_format
diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py
index 903439d2d2292..ae3c806ac1c8e 100644
--- a/pandas/tests/io/parser/parse_dates.py
+++ b/pandas/tests/io/parser/parse_dates.py
@@ -13,9 +13,9 @@
from pandas._libs.tslibs import parsing
from pandas._libs.tslib import Timestamp
+import pytz
import pandas as pd
import pandas.io.parsers as parsers
-import pandas.core.tools.datetimes as tools
import pandas.util.testing as tm
import pandas.io.date_converters as conv
@@ -356,21 +356,13 @@ def test_parse_dates_custom_euroformat(self):
def test_parse_tz_aware(self):
# See gh-1693
- import pytz
data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")
# it works
result = self.read_csv(data, index_col=0, parse_dates=True)
stamp = result.index[0]
assert stamp.minute == 39
- try:
- assert result.index.tz is pytz.utc
- except AssertionError:
- arr = result.index.to_pydatetime()
- result = tools.to_datetime(arr, utc=True)[0]
- assert stamp.minute == result.minute
- assert stamp.hour == result.hour
- assert stamp.day == result.day
+ assert result.index.tz is pytz.utc
def test_multiple_date_cols_index(self):
data = """
@@ -674,3 +666,19 @@ def test_parse_date_float(self, data, expected, parse_dates):
# (i.e. float precision should remain unchanged).
result = self.read_csv(StringIO(data), parse_dates=parse_dates)
tm.assert_frame_equal(result, expected)
+
+ def test_parse_timezone(self):
+ # gh-22256
+ data = """dt,val
+ 2018-01-04 09:01:00+09:00,23350
+ 2018-01-04 09:02:00+09:00,23400
+ 2018-01-04 09:03:00+09:00,23400
+ 2018-01-04 09:04:00+09:00,23400
+ 2018-01-04 09:05:00+09:00,23400"""
+ parsed = self.read_csv(StringIO(data), parse_dates=['dt'])
+ dti = pd.DatetimeIndex(start='2018-01-04 09:01:00',
+ end='2018-01-04 09:05:00', freq='1min',
+ tz=pytz.FixedOffset(540))
+ expected_data = {'dt': dti, 'val': [23350, 23400, 23400, 23400, 23400]}
+ expected = DataFrame(expected_data)
+ tm.assert_frame_equal(parsed, expected)