Skip to content

Commit 0fc0336

Browse files
mroeschkeAloqeelypre-commit-ci[bot]
authored
CLN: Remove deprecated read_csv(delim_whitespace=) (#58668)
* CLN: Remove deprecated read_csv(delim_whitespace=) * Clarify notes * Fix some arrow failures * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Abdulaziz Aloqeely <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Abdulaziz Aloqeely <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent c1234db commit 0fc0336

14 files changed

+50
-284
lines changed

doc/source/user_guide/io.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,7 +1511,6 @@ Currently, options unsupported by the C and pyarrow engines include:
15111511

15121512
* ``sep`` other than a single character (e.g. regex separators)
15131513
* ``skipfooter``
1514-
* ``sep=None`` with ``delim_whitespace=False``
15151514

15161515
Specifying any of the above options will produce a ``ParserWarning`` unless the
15171516
python engine is selected explicitly using ``engine='python'``.
@@ -1526,7 +1525,6 @@ Options that are unsupported by the pyarrow engine which are not covered by the
15261525
* ``memory_map``
15271526
* ``dialect``
15281527
* ``on_bad_lines``
1529-
* ``delim_whitespace``
15301528
* ``quoting``
15311529
* ``lineterminator``
15321530
* ``converters``

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ Removal of prior version deprecations/changes
243243
- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
244244
- Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`)
245245
- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
246+
- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`)
246247
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
247248
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
248249
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)

pandas/errors/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,6 @@ class ParserWarning(Warning):
229229
230230
1. `sep` other than a single character (e.g. regex separators)
231231
2. `skipfooter` higher than 0
232-
3. `sep=None` with `delim_whitespace=False`
233232
234233
The warning can be avoided by adding `engine='python'` as a parameter in
235234
`pd.read_csv` and `pd.read_table` methods.

pandas/io/clipboards.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,8 @@ def read_clipboard(
113113
if index_length != 0:
114114
kwargs.setdefault("index_col", list(range(index_length)))
115115

116-
# Edge case where sep is specified to be None, return to default
117-
if sep is None and kwargs.get("delim_whitespace") is None:
118-
sep = r"\s+"
116+
elif not isinstance(sep, str):
117+
raise ValueError(f"{sep=} must be a string")
119118

120119
# Regex separator currently only works with python engine.
121120
# Default to python if separator is multi-character (regex)

pandas/io/parsers/readers.py

Lines changed: 2 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
133133
encoding_errors: str | None
134134
dialect: str | csv.Dialect | None
135135
on_bad_lines: str
136-
delim_whitespace: bool | lib.NoDefault
137136
low_memory: bool
138137
memory_map: bool
139138
float_precision: Literal["high", "legacy", "round_trip"] | None
@@ -425,14 +424,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
425424
426425
Callable for ``engine='pyarrow'``
427426
428-
delim_whitespace : bool, default False
429-
Specifies whether or not whitespace (e.g. ``' '`` or ``'\\t'``) will be
430-
used as the ``sep`` delimiter. Equivalent to setting ``sep='\\s+'``. If this option
431-
is set to ``True``, nothing should be passed in for the ``delimiter``
432-
parameter.
433-
434-
.. deprecated:: 2.2.0
435-
Use ``sep="\\s+"`` instead.
436427
low_memory : bool, default True
437428
Internally process the file in chunks, resulting in lower memory use
438429
while parsing, but possibly mixed type inference. To ensure no mixed
@@ -558,15 +549,13 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
558549

559550

560551
class _C_Parser_Defaults(TypedDict):
561-
delim_whitespace: Literal[False]
562552
na_filter: Literal[True]
563553
low_memory: Literal[True]
564554
memory_map: Literal[False]
565555
float_precision: None
566556

567557

568558
_c_parser_defaults: _C_Parser_Defaults = {
569-
"delim_whitespace": False,
570559
"na_filter": True,
571560
"low_memory": True,
572561
"memory_map": False,
@@ -592,7 +581,6 @@ class _Fwf_Defaults(TypedDict):
592581
"thousands",
593582
"memory_map",
594583
"dialect",
595-
"delim_whitespace",
596584
"quoting",
597585
"lineterminator",
598586
"converters",
@@ -818,24 +806,12 @@ def read_csv(
818806
# Error Handling
819807
on_bad_lines: str = "error",
820808
# Internal
821-
delim_whitespace: bool | lib.NoDefault = lib.no_default,
822809
low_memory: bool = _c_parser_defaults["low_memory"],
823810
memory_map: bool = False,
824811
float_precision: Literal["high", "legacy", "round_trip"] | None = None,
825812
storage_options: StorageOptions | None = None,
826813
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
827814
) -> DataFrame | TextFileReader:
828-
if delim_whitespace is not lib.no_default:
829-
# GH#55569
830-
warnings.warn(
831-
"The 'delim_whitespace' keyword in pd.read_csv is deprecated and "
832-
"will be removed in a future version. Use ``sep='\\s+'`` instead",
833-
FutureWarning,
834-
stacklevel=find_stack_level(),
835-
)
836-
else:
837-
delim_whitespace = False
838-
839815
# locals() should never be modified
840816
kwds = locals().copy()
841817
del kwds["filepath_or_buffer"]
@@ -844,7 +820,6 @@ def read_csv(
844820
kwds_defaults = _refine_defaults_read(
845821
dialect,
846822
delimiter,
847-
delim_whitespace,
848823
engine,
849824
sep,
850825
on_bad_lines,
@@ -963,24 +938,12 @@ def read_table(
963938
# Error Handling
964939
on_bad_lines: str = "error",
965940
# Internal
966-
delim_whitespace: bool | lib.NoDefault = lib.no_default,
967941
low_memory: bool = _c_parser_defaults["low_memory"],
968942
memory_map: bool = False,
969943
float_precision: Literal["high", "legacy", "round_trip"] | None = None,
970944
storage_options: StorageOptions | None = None,
971945
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
972946
) -> DataFrame | TextFileReader:
973-
if delim_whitespace is not lib.no_default:
974-
# GH#55569
975-
warnings.warn(
976-
"The 'delim_whitespace' keyword in pd.read_table is deprecated and "
977-
"will be removed in a future version. Use ``sep='\\s+'`` instead",
978-
FutureWarning,
979-
stacklevel=find_stack_level(),
980-
)
981-
else:
982-
delim_whitespace = False
983-
984947
# locals() should never be modified
985948
kwds = locals().copy()
986949
del kwds["filepath_or_buffer"]
@@ -989,7 +952,6 @@ def read_table(
989952
kwds_defaults = _refine_defaults_read(
990953
dialect,
991954
delimiter,
992-
delim_whitespace,
993955
engine,
994956
sep,
995957
on_bad_lines,
@@ -1296,17 +1258,10 @@ def _clean_options(
12961258
engine = "python"
12971259

12981260
sep = options["delimiter"]
1299-
delim_whitespace = options["delim_whitespace"]
13001261

1301-
if sep is None and not delim_whitespace:
1302-
if engine in ("c", "pyarrow"):
1303-
fallback_reason = (
1304-
f"the '{engine}' engine does not support "
1305-
"sep=None with delim_whitespace=False"
1306-
)
1307-
engine = "python"
1308-
elif sep is not None and len(sep) > 1:
1262+
if sep is not None and len(sep) > 1:
13091263
if engine == "c" and sep == r"\s+":
1264+
# delim_whitespace passed on to pandas._libs.parsers.TextReader
13101265
result["delim_whitespace"] = True
13111266
del result["delimiter"]
13121267
elif engine not in ("python", "python-fwf"):
@@ -1317,9 +1272,6 @@ def _clean_options(
13171272
r"different from '\s+' are interpreted as regex)"
13181273
)
13191274
engine = "python"
1320-
elif delim_whitespace:
1321-
if "python" in engine:
1322-
result["delimiter"] = r"\s+"
13231275
elif sep is not None:
13241276
encodeable = True
13251277
encoding = sys.getfilesystemencoding() or "utf-8"
@@ -1730,7 +1682,6 @@ def _stringify_na_values(na_values, floatify: bool) -> set[str | float]:
17301682
def _refine_defaults_read(
17311683
dialect: str | csv.Dialect | None,
17321684
delimiter: str | None | lib.NoDefault,
1733-
delim_whitespace: bool,
17341685
engine: CSVEngine | None,
17351686
sep: str | None | lib.NoDefault,
17361687
on_bad_lines: str | Callable,
@@ -1750,14 +1701,6 @@ def _refine_defaults_read(
17501701
documentation for more details.
17511702
delimiter : str or object
17521703
Alias for sep.
1753-
delim_whitespace : bool
1754-
Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
1755-
used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
1756-
is set to True, nothing should be passed in for the ``delimiter``
1757-
parameter.
1758-
1759-
.. deprecated:: 2.2.0
1760-
Use ``sep="\\s+"`` instead.
17611704
engine : {{'c', 'python'}}
17621705
Parser engine to use. The C engine is faster while the python engine is
17631706
currently more feature-complete.
@@ -1777,12 +1720,6 @@ def _refine_defaults_read(
17771720
-------
17781721
kwds : dict
17791722
Input parameters with correct values.
1780-
1781-
Raises
1782-
------
1783-
ValueError :
1784-
If a delimiter was specified with ``sep`` (or ``delimiter``) and
1785-
``delim_whitespace=True``.
17861723
"""
17871724
# fix types for sep, delimiter to Union(str, Any)
17881725
delim_default = defaults["delimiter"]
@@ -1813,12 +1750,6 @@ def _refine_defaults_read(
18131750
if delimiter is None:
18141751
delimiter = sep
18151752

1816-
if delim_whitespace and (delimiter is not lib.no_default):
1817-
raise ValueError(
1818-
"Specified a delimiter with both sep and "
1819-
"delim_whitespace=True; you can only specify one."
1820-
)
1821-
18221753
if delimiter == "\n":
18231754
raise ValueError(
18241755
r"Specified \n as separator or delimiter. This forces the python engine "

0 commit comments

Comments
 (0)