From c7858f61fcd1062ab2333f286ccec142b9c916e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Israel=20Saeta=20P=C3=A9rez?= Date: Sun, 3 Apr 2016 15:23:22 +0200 Subject: [PATCH] DOC: Clarify when csv separator is being parsed as regex. Resolves #10208. --- doc/source/io.rst | 7 ++++--- pandas/io/parsers.py | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index d606e919e4292..a78222dd748ad 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -91,9 +91,10 @@ filepath_or_buffer : various :class:`~python:io.StringIO`). sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table` Delimiter to use. If sep is ``None``, - will try to automatically determine this. Regular expressions are accepted, - use of a regular expression will force use of the python parsing engine and - will ignore quotes in the data. + will try to automatically determine this. Separators longer than 1 character + and different from ``'\s+'`` will be interpreted as regular expressions, will + force use of the python parsing engine and will ignore quotes in the data. + Regex example: ``'\\r\\t'``. delimiter : str, default ``None`` Alternative argument name for sep. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index e99fc3db606af..7bd8a593661c5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -221,8 +221,9 @@ class ParserWarning(Warning): _sep_doc = """sep : str, default {default} Delimiter to use. If sep is None, will try to automatically determine - this. Regular expressions are accepted and will force use of the python - parsing engine and will ignore quotes in the data.""" + this. Separators longer than 1 character and different from '\s+' will be + interpreted as regular expressions, will force use of the python parsing + engine and will ignore quotes in the data. Regex example: '\\r\\t'""" _read_csv_doc = """ Read CSV (comma-separated) file into DataFrame @@ -674,7 +675,9 @@ def _clean_options(self, options, engine): elif engine not in ('python', 'python-fwf'): # wait until regex engine integrated fallback_reason = "the 'c' engine does not support"\ - " regex separators" + " regex separators (separators > 1 char and"\ + " different from '\s+' are"\ + " interpreted as regex)" engine = 'python' if fallback_reason and engine_specified: