Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
30c9b83
add values.dtype.kind==f branch to array_with_unit_datetime
arw2019 Jun 27, 2020
2f25460
merge with master
arw2019 Jun 29, 2020
572363a
revert pandas/_libs/tslib.pyx
arw2019 Jun 29, 2020
b891030
merge with master
arw2019 Jun 30, 2020
ecd8ce3
merge with master
arw2019 Jun 30, 2020
ee55191
merge with master
arw2019 Jul 2, 2020
292fcdc
merge with master
arw2019 Jul 7, 2020
9e4ac71
Merge remote-tracking branch 'upstream/master'
arw2019 Jul 8, 2020
1d0ba61
merge with master
arw2019 Jul 8, 2020
b59831e
Merge branch 'master' of https://github.com/arw2019/pandas
arw2019 Jul 16, 2020
b954874
Merge remote-tracking branch 'upstream/master'
arw2019 Jul 16, 2020
ac0a7f1
merge with master
arw2019 Jul 16, 2020
bc55716
added line_terminator arg to read_csv
arw2019 Jul 24, 2020
ee69a76
added line_terminator, lineterminator args + tests
arw2019 Jul 24, 2020
4d00fea
merge with master
arw2019 Jul 24, 2020
c015da5
Merge remote-tracking branch 'upstream/master'
arw2019 Jul 24, 2020
73d6d11
fix csv api using kwargs
arw2019 Jul 24, 2020
1a6497f
TST: remove failing test - read_csv takes kwargs now
arw2019 Jul 25, 2020
3a88ef0
add space between kwargs and colon in docstring
arw2019 Jul 25, 2020
7fe8274
DOC: remove the semicolon after kwargs
arw2019 Jul 25, 2020
1c27b2c
added line_terminator arg to read_csv
arw2019 Jul 24, 2020
1912aa2
added line_terminator, lineterminator args + tests
arw2019 Jul 24, 2020
f54df81
fix csv api using kwargs
arw2019 Jul 24, 2020
cea28d8
TST: remove failing test - read_csv takes kwargs now
arw2019 Jul 25, 2020
85ddf44
add space between kwargs and colon in docstring
arw2019 Jul 25, 2020
a28657c
DOC: remove the semicolon after kwargs
arw2019 Jul 25, 2020
2b1333f
Merge branch 'csv-api' of https://github.com/arw2019/pandas into csv-api
arw2019 Jul 27, 2020
0786617
merge with master
arw2019 Aug 21, 2020
5e87bbc
small changes to docstrings
arw2019 Aug 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3004,6 +3004,7 @@ def to_csv(
quoting: Optional[int] = None,
quotechar: str = '"',
line_terminator: Optional[str] = None,
lineterminator: Optional[str] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, this signature is displayed in the documentation so doing it like this could be confusing. (also this function is not kwargs only, so although bonkers, passing arguments as positional arguments would break)

maybe add a **kwargs at the end and document as 'for compatbility with csv module'

then in code something like kwargs.setdefault('lineterminator', line_terminator) and pass on kwargs instead of line_terminator to CSVFormatter which just passes them onto csvlib.writer

Copy link
Member Author

@arw2019 arw2019 Jul 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good! I implemented this idea and added optional kwargs to the docstrings.

In to_csv it's maybe a little awkward

kwargs.setdefault("lineterminator", line_terminator)
line_terminator = line_terminator or kwargs["lineterminator"]

because we have to feed line_terminator to CSVFormatter but also we want to keep line_terminator explicit for the docs. It works but I'm happy to write this another way if these lines look odd

chunksize: Optional[int] = None,
date_format: Optional[str] = None,
doublequote: bool_t = True,
Expand Down Expand Up @@ -3144,7 +3145,7 @@ def to_csv(
formatter = CSVFormatter(
df,
path_or_buf,
line_terminator=line_terminator,
line_terminator=line_terminator or lineterminator,
sep=sep,
encoding=encoding,
errors=errors,
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,7 @@ def read_csv(
compression="infer",
thousands=None,
decimal: str = ".",
line_terminator=None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again, in the signature, I think we should only have the one parameter and the compatibility keyword accepted though **kwargs

lineterminator=None,
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
Expand Down Expand Up @@ -643,7 +644,7 @@ def read_csv(
quotechar=quotechar,
quoting=quoting,
skipinitialspace=skipinitialspace,
lineterminator=lineterminator,
lineterminator=lineterminator or line_terminator, # GH 9568
header=header,
index_col=index_col,
names=names,
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
from io import StringIO
import os
import re

import numpy as np
import pytest
Expand Down Expand Up @@ -998,6 +999,45 @@ def test_to_csv_line_terminators(self):
with open(path, mode="rb") as f:
assert f.read() == expected

def test_to_csv_lineterminator_alternative_args(self):
# GH 9568
# examples from test_to_csv_line_terminators
# test equivalence of line_terminator vs. lineterminator keyword args

df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"])

# case 1: CRLF as line terminator

with tm.ensure_clean() as path:
df.to_csv(path, line_terminator="\r\n")

with open(path, mode="rb") as f:
res_line_terminator = f.read()

with tm.ensure_clean() as path:
df.to_csv(path, lineterminator="\r\n")

with open(path, mode="rb") as f:
res_lineterminator = f.read()

assert re.match(res_line_terminator, res_lineterminator)

# case 2: LF as line terminator

with tm.ensure_clean() as path:
df.to_csv(path, line_terminator="\n")

with open(path, mode="rb") as f:
res_line_terminator = f.read()

with tm.ensure_clean() as path:
df.to_csv(path, lineterminator="\n")

with open(path, mode="rb") as f:
res_lineterminator = f.read()

assert re.match(res_line_terminator, res_lineterminator)

def test_to_csv_from_csv_categorical(self):

# CSV with categoricals should result in the same output
Expand Down
29 changes: 27 additions & 2 deletions pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import io
import os
import re
import sys

import numpy as np
Expand Down Expand Up @@ -330,10 +331,15 @@ def test_to_csv_multi_index(self):
@pytest.mark.parametrize("klass", [pd.DataFrame, pd.Series])
def test_to_csv_single_level_multi_index(self, ind, expected, klass):
# see gh-19589
result = klass(pd.Series([1], ind, name="data")).to_csv(
# GH9568 test for equivalence between line_terminator and lineterminator
result_line_terminator = klass(pd.Series([1], ind, name="data")).to_csv(
line_terminator="\n", header=True
)
assert result == expected
result_lineterminator = klass(pd.Series([1], ind, name="data")).to_csv(
lineterminator="\n", header=True
)
assert re.match(result_lineterminator, result_line_terminator)
assert re.match(result_line_terminator, expected)

def test_to_csv_string_array_ascii(self):
# GH 10813
Expand Down Expand Up @@ -436,6 +442,25 @@ def test_to_csv_string_with_crlf(self):
with open(path, "rb") as f:
assert f.read() == expected_crlf

def test_to_csv_string_line_terminator_alternative_args(self):
# GH 9568
# test equivalence of line_terminator vs. lineterminator keyword args

data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
df = pd.DataFrame(data)

with tm.ensure_clean("crlf_test.csv") as path:
df.to_csv(path, line_terminator="\n", index=False)
with open(path, "rb") as f:
res_line_terminator = f.read()

with tm.ensure_clean("crlf_test.csv") as path:
df.to_csv(path, lineterminator="\n", index=False)
with open(path, "rb") as f:
res_lineterminator = f.read()

assert re.match(res_line_terminator, res_lineterminator)

def test_to_csv_stdout_file(self, capsys):
# GH 21561
df = pd.DataFrame(
Expand Down