Skip to content

Commit b4f6929

Browse files
committed
EHN: Add encoding_errors option in pandas.DataFrame.to_csv (#27750)
encoding_errors : str, default 'strict' Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.) See: https://docs.python.org/3/library/codecs.html#codec-base-classes
1 parent f8a924b commit b4f6929

File tree

6 files changed

+488
-3
lines changed

6 files changed

+488
-3
lines changed

doc/source/user_guide/io.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,8 @@ function takes a number of arguments. Only the first is required.
17101710
appropriate (default None)
17111711
* ``chunksize``: Number of rows to write at a time
17121712
* ``date_format``: Format string for datetime objects
1713+
* ``encoding_errors``: Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.)
1714+
.. versionadded:: 1.0.0
17131715

17141716
Writing a formatted string
17151717
++++++++++++++++++++++++++

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ including other versions of pandas.
2121
Enhancements
2222
~~~~~~~~~~~~
2323

24-
-
24+
- :meth:`Dataframe.to_csv` Add `encoding_errors` option (:issue:`27750`).
2525
-
2626

2727
.. _whatsnew_1000.enhancements.other:

pandas/core/generic.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3084,6 +3084,7 @@ def to_csv(
30843084
doublequote: bool_t = True,
30853085
escapechar: Optional[str] = None,
30863086
decimal: Optional[str] = ".",
3087+
encoding_errors: Optional[str] = "strict",
30873088
) -> Optional[str]:
30883089
r"""
30893090
Write object to a comma-separated values (csv) file.
@@ -3171,6 +3172,11 @@ def to_csv(
31713172
decimal : str, default '.'
31723173
Character recognized as decimal separator. E.g. use ',' for
31733174
European data.
3175+
encoding_errors : str, default 'strict'
3176+
Behavior when the input string can’t be converted according to
3177+
the encoding’s rules (strict, ignore, replace, etc.)
3178+
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
3179+
.. versionadded:: 1.0.0
31743180
31753181
Returns
31763182
-------
@@ -3224,6 +3230,7 @@ def to_csv(
32243230
doublequote=doublequote,
32253231
escapechar=escapechar,
32263232
decimal=decimal,
3233+
encoding_errors=encoding_errors,
32273234
)
32283235
formatter.save()
32293236

pandas/io/common.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ def _get_handle(
363363
compression: Optional[Union[str, Dict[str, Any]]] = None,
364364
memory_map: bool = False,
365365
is_text: bool = True,
366+
encoding_errors: Optional[str] = "strict",
366367
):
367368
"""
368369
Get file handle for given path/buffer and mode.
@@ -395,6 +396,11 @@ def _get_handle(
395396
is_text : boolean, default True
396397
whether file/buffer is in text format (csv, json, etc.), or in binary
397398
mode (pickle, etc.).
399+
encoding_errors : str, default 'strict'
400+
Behavior when the input string can’t be converted according to
401+
the encoding’s rules (strict, ignore, replace, etc.)
402+
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
403+
.. versionadded:: 1.0.0
398404
399405
Returns
400406
-------
@@ -472,10 +478,12 @@ def _get_handle(
472478
elif is_path:
473479
if encoding:
474480
# Encoding
475-
f = open(path_or_buf, mode, encoding=encoding, newline="")
481+
f = open(
482+
path_or_buf, mode, errors=encoding_errors, encoding=encoding, newline=""
483+
)
476484
elif is_text:
477485
# No explicit encoding
478-
f = open(path_or_buf, mode, errors="replace", newline="")
486+
f = open(path_or_buf, mode, errors=encoding_errors, newline="")
479487
else:
480488
# Binary mode
481489
f = open(path_or_buf, mode)

pandas/io/formats/csvs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def __init__(
5252
doublequote=True,
5353
escapechar=None,
5454
decimal=".",
55+
encoding_errors="strict",
5556
):
5657

5758
self.obj = obj
@@ -97,6 +98,8 @@ def __init__(
9798

9899
self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)
99100

101+
self.encoding_errors = encoding_errors
102+
100103
# validate mi options
101104
if self.has_mi_columns:
102105
if cols is not None:
@@ -183,6 +186,7 @@ def save(self):
183186
self.mode,
184187
encoding=self.encoding,
185188
compression=dict(self.compression_args, method=self.compression),
189+
encoding_errors=self.encoding_errors,
186190
)
187191
close = True
188192

0 commit comments

Comments
 (0)