Skip to content

BUG: to_csv output formatting for datetimes #30180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3146,6 +3146,7 @@ def to_csv(
line_terminator: Optional[str] = None,
chunksize: Optional[int] = None,
date_format: Optional[str] = None,
round_milliseconds: bool_t = False,
doublequote: bool_t = True,
escapechar: Optional[str] = None,
decimal: Optional[str] = ".",
Expand Down Expand Up @@ -3286,6 +3287,7 @@ def to_csv(
chunksize=chunksize,
quotechar=quotechar,
date_format=date_format,
round_milliseconds=round_milliseconds,
doublequote=doublequote,
escapechar=escapechar,
decimal=decimal,
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2189,7 +2189,13 @@ def _can_hold_element(self, element: Any) -> bool:
return is_valid_nat_for_dtype(element, self.dtype)

def to_native_types(
self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs
self,
slicer=None,
na_rep=None,
date_format=None,
quoting=None,
round_milliseconds=None,
**kwargs,
):
""" convert to our native types format, slicing if desired """

Expand All @@ -2210,6 +2216,14 @@ def to_native_types(
format=fmt,
na_rep=na_rep,
).reshape(i8values.shape)

if round_milliseconds:
for i in range(len(result)):
milliseconds = result[i][0].split(".")[1]
can_round = milliseconds == len(milliseconds) * "0"
if can_round:
result[i] = result[i][0].split(".")[0]

return np.atleast_2d(result)

def should_store(self, value):
Expand Down
5 changes: 5 additions & 0 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
chunksize=None,
quotechar='"',
date_format=None,
round_milliseconds=None,
doublequote=True,
escapechar=None,
decimal=".",
Expand Down Expand Up @@ -94,6 +95,7 @@ def __init__(
self.line_terminator = line_terminator or os.linesep

self.date_format = date_format
self.round_milliseconds = round_milliseconds

self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)

Expand All @@ -109,6 +111,7 @@ def __init__(
float_format=float_format,
date_format=date_format,
quoting=self.quoting,
round_milliseconds=round_milliseconds,
)
else:
cols = list(cols)
Expand All @@ -123,6 +126,7 @@ def __init__(
float_format=float_format,
date_format=date_format,
quoting=self.quoting,
round_milliseconds=round_milliseconds,
)
else:
cols = list(cols)
Expand Down Expand Up @@ -342,6 +346,7 @@ def _save_chunk(self, start_i: int, end_i: int):
decimal=self.decimal,
date_format=self.date_format,
quoting=self.quoting,
round_milliseconds=self.round_milliseconds,
)

for col_loc, col in zip(b.mgr_locs, d):
Expand Down
60 changes: 60 additions & 0 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import datetime
from io import StringIO
import os

Expand Down Expand Up @@ -1356,3 +1357,62 @@ def test_gz_lineend(self):
result = f.read().decode("utf-8")

assert result == expected

def test_to_csv_dropna_format(self):
# see gh-29711
date_example_string = "1911180945"
ts = datetime.datetime.strptime(date_example_string, "%y%m%d%H%M%S")
test_json = [{"created_at": "2019-11-18 16:28:42.932887", "foo": "bar",}]

df = pd.DataFrame(test_json)
df["baz"] = ts

e = ",created_at,foo,baz\n0,2019-11-18 16:28:42.932887,bar,2019-11-18 09:04:05\n"
result = df.to_csv()
assert result == e

df["created_at"] = pd.to_datetime(
df["created_at"], infer_datetime_format=True, errors="coerce"
)
result2 = df.to_csv()
assert result2 == e

df = df.dropna(subset=["created_at"])
result3 = df.to_csv()
assert result3 == e

def test_to_csv_round_milliseconds(self):
# see gh-29711
date_example_string = "1911180945"
ts = datetime.datetime.strptime(date_example_string, "%y%m%d%H%M%S")
test_json = [{"created_at": "2019-11-18 16:28:42.932887", "foo": "bar",}]

df = pd.DataFrame(test_json)
df["baz"] = ts

df["created_at"] = pd.to_datetime(
df["created_at"], infer_datetime_format=True, errors="coerce"
)

df = df.dropna(subset=["created_at"])

e = ",created_at,foo,baz\n0,2019-11-18 16:28:42.932887,bar,2019-11-18 09:04:05\n"
result = df.to_csv(date_format="%Y-%m-%d %H:%M:%S.%f", round_milliseconds=True)
assert result == e

date_example_string = "1911180945"
ts = datetime.datetime.strptime(date_example_string, "%y%m%d%H%M%S")
test_json = [{"created_at": "2019-11-18 16:28:42.000001", "foo": "bar",}]

df = pd.DataFrame(test_json)
df["baz"] = ts

df["created_at"] = pd.to_datetime(
df["created_at"], infer_datetime_format=True, errors="coerce"
)

df = df.dropna(subset=["created_at"])

e = ",created_at,foo,baz\n0,2019-11-18 16:28:42.000001,bar,2019-11-18 09:04:05\n"
result = df.to_csv(date_format="%Y-%m-%d %H:%M:%S.%f", round_milliseconds=True)
assert result == e