From 4222fd5a8bd25f8dff1eceea23d2a1aaf846867e Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 26 Mar 2019 14:46:07 +0300 Subject: [PATCH 01/32] Add new benchmarks for parsing datetime strings --- asv_bench/benchmarks/io/csv.py | 18 ++++++++++++++++++ asv_bench/benchmarks/io/parsers.py | 20 ++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 asv_bench/benchmarks/io/parsers.py diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 88c2a6f997a5e..5c727bc001e14 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -251,4 +251,22 @@ def mem_parser_chunks(self): pass +class ReadCSVParseSpecialDate(StringIORewind): + params = (['mY', 'mdY'],) + params_name = ['value'] + objects = { + 'mY': '01-2019\n10-2019\n02/2000\n', + 'mdY': '12/02/2010\n' + } + + def setup(self, value): + count_elem = 10000 + data = self.objects[value] * count_elem + self.StringIO_input = StringIO(data) + + def time_read_special_date(self, value): + read_csv(self.data(self.StringIO_input), sep=',', header=None, + names=['Date'], parse_dates=['Date']) + + from ..pandas_vb_common import setup # noqa: F401 diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py new file mode 100644 index 0000000000000..ec0a45fd10600 --- /dev/null +++ b/asv_bench/benchmarks/io/parsers.py @@ -0,0 +1,20 @@ +import numpy as np + +from pandas._libs.tslib import array_to_datetime + +class ParseDateString(object): + params = (['mY', 'mdY', 'mQY', 'hm'],) + params_name = ['value'] + objects = { + 'mY': ['01-2019', '1-2019'], + 'mdY': ['12/02/2010'], + 'mQY': ['1Q09', '1Q2000', '09Q1', '2000Q1'], + 'hm': ['21:34'] + } + + def setup(self, value): + count_elem = 100000 + self.data = np.array(self.objects[value] * count_elem, dtype=np.object) + + def time_parse_datestring(self, value): + array_to_datetime(self.data) From 78254a4e82fb326af29138f93a726d5251ef4f44 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Thu, 28 Mar 2019 18:30:51 +0300 Subject: [PATCH 02/32] Implement parsing dd/mm/yyyy and mm/dd/yyyy in Cython --- pandas/_libs/src/headers/portable.h | 1 + pandas/_libs/tslibs/parsing.pyx | 75 ++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/headers/portable.h b/pandas/_libs/src/headers/portable.h index 1976addace3f3..cb8e5ba8138eb 100644 --- a/pandas/_libs/src/headers/portable.h +++ b/pandas/_libs/src/headers/portable.h @@ -8,6 +8,7 @@ // GH-23516 - works around locale perf issues // from MUSL libc, MIT Licensed - see LICENSES #define isdigit_ascii(c) (((unsigned)(c) - '0') < 10u) +#define getdigit_ascii(c, default) (isdigit_ascii(c) ? ((int)((c) - '0')) : default) #define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5)) #define toupper_ascii(c) ((((unsigned)(c) - 'a') < 26) ? ((c) & 0x5f) : (c)) #define tolower_ascii(c) ((((unsigned)(c) - 'A') < 26) ? ((c) | 0x20) : (c)) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 6fd4379d953d5..ff59c638400c1 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -6,8 +6,10 @@ import re import time from io import StringIO -from cpython.datetime cimport datetime +from libc.string cimport strchr +from cpython.datetime cimport datetime, PyDateTime_IMPORT, PyDateTimeAPI +PyDateTime_IMPORT import numpy as np @@ -24,6 +26,10 @@ from pandas._config import get_option from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS from pandas._libs.tslibs.nattype import nat_strings, NaT +from pandas._libs.tslibs.util cimport get_c_string_buf_and_size + +cdef extern from "../src/headers/portable.h": + int getdigit_ascii(char c, int default) nogil # ---------------------------------------------------------------------- # Constants @@ -35,6 +41,7 @@ class DateParseError(ValueError): _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) +_DEFAULT_TZINFO = _DEFAULT_DATETIME.tzinfo cdef: object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])') @@ -42,6 +49,68 @@ cdef: set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} # ---------------------------------------------------------------------- +cdef char delimiters[5] +delimiters[:] = [b' ', b'/', b'-', b'\\', b'\0'] + +cdef int MAX_DAYS_IN_MONTH = 31 +cdef int MAX_MONTH = 12 + +cdef inline int _parse_2digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 10 + result += getdigit_ascii(s[1], -100) * 1 + return result + +cdef inline int _parse_4digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 1000 + result += getdigit_ascii(s[1], -100) * 100 + result += getdigit_ascii(s[2], -1000) * 10 + result += getdigit_ascii(s[3], -10000) * 1 + return result + +cdef object parse_slashed_date(object date_string, bint dayfirst, + object tzinfo): + cdef: + const char* buf + Py_ssize_t length + int day, month, year + int part1, part2 + + buf = get_c_string_buf_and_size(date_string, &length) + if length != 10 or strchr(delimiters, buf[2]) == NULL \ + or strchr(delimiters, buf[5]) == NULL: + return None + + part1 = _parse_2digit(buf) + part2 = _parse_2digit(buf + 3) + year = _parse_4digit(buf + 6) + if part1 < 0 or part2 < 0 or year < 0: + # some part is not an integer, so it's not a dd/mm/yyyy date + return None + + if part1 < 1 or part2 < 1 or \ + part1 > MAX_DAYS_IN_MONTH or part2 > MAX_DAYS_IN_MONTH or \ + (part1 > MAX_MONTH and part2 > MAX_MONTH): + raise DateParseError("Invalid date specified (%d/%d)" % + (part1, part2)) + + if part1 > MAX_MONTH: + day = part1 + month = part2 + elif part2 > MAX_MONTH: + day = part2 + month = part1 + elif dayfirst: + day = part1 + month = part2 + else: + day = part2 + month = part1 + + return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, + 0, 0, 0, 0, tzinfo, + PyDateTimeAPI.DateTimeType) def parse_datetime_string(date_string, freq=None, dayfirst=False, @@ -66,6 +135,10 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=yearfirst, **kwargs) return dt + dt = parse_slashed_date(date_string, dayfirst, _DEFAULT_TZINFO) + if dt is not None: + return dt + try: dt, _, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) return dt From 160809029b02a5d5292963c2e03c69f28e6a0929 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Mar 2019 12:03:44 +0300 Subject: [PATCH 03/32] fix code style --- asv_bench/benchmarks/io/parsers.py | 7 ++++--- pandas/_libs/tslibs/parsing.pyx | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py index ec0a45fd10600..35df41eba9ce6 100644 --- a/asv_bench/benchmarks/io/parsers.py +++ b/asv_bench/benchmarks/io/parsers.py @@ -2,14 +2,15 @@ from pandas._libs.tslib import array_to_datetime + class ParseDateString(object): params = (['mY', 'mdY', 'mQY', 'hm'],) params_name = ['value'] objects = { - 'mY': ['01-2019', '1-2019'], + 'hm': ['21:34'], + 'mY': ['01-2019', '1-2019'], 'mdY': ['12/02/2010'], - 'mQY': ['1Q09', '1Q2000', '09Q1', '2000Q1'], - 'hm': ['21:34'] + 'mQY': ['1Q09', '1Q2000', '09Q1', '2000Q1'] } def setup(self, value): diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ff59c638400c1..620fab13f51dd 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -60,7 +60,7 @@ cdef inline int _parse_2digit(const char* s): result += getdigit_ascii(s[0], -10) * 10 result += getdigit_ascii(s[1], -100) * 1 return result - + cdef inline int _parse_4digit(const char* s): cdef int result = 0 result += getdigit_ascii(s[0], -10) * 1000 @@ -92,7 +92,7 @@ cdef object parse_slashed_date(object date_string, bint dayfirst, if part1 < 1 or part2 < 1 or \ part1 > MAX_DAYS_IN_MONTH or part2 > MAX_DAYS_IN_MONTH or \ (part1 > MAX_MONTH and part2 > MAX_MONTH): - raise DateParseError("Invalid date specified (%d/%d)" % + raise DateParseError("Invalid date specified (%d/%d)" % (part1, part2)) if part1 > MAX_MONTH: @@ -108,7 +108,7 @@ cdef object parse_slashed_date(object date_string, bint dayfirst, day = part2 month = part1 - return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, + return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, 0, 0, 0, 0, tzinfo, PyDateTimeAPI.DateTimeType) From eec3beb7617aeaf791e296b43765d7106a975a17 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Mar 2019 13:12:03 +0300 Subject: [PATCH 04/32] using DEF statement for compile-time constant --- pandas/_libs/tslibs/parsing.pyx | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 620fab13f51dd..81f0c431578f3 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -49,11 +49,9 @@ cdef: set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} # ---------------------------------------------------------------------- -cdef char delimiters[5] -delimiters[:] = [b' ', b'/', b'-', b'\\', b'\0'] - -cdef int MAX_DAYS_IN_MONTH = 31 -cdef int MAX_MONTH = 12 +DEF delimiters = b' /-\\' +DEF MAX_DAYS_IN_MONTH = 31 +DEF MAX_MONTH = 12 cdef inline int _parse_2digit(const char* s): cdef int result = 0 From d322b8d98d16f8507c52f3035dda691a7227d6d0 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Mar 2019 15:03:29 +0300 Subject: [PATCH 05/32] parse_slashed_date simplification --- pandas/_libs/tslibs/parsing.pyx | 39 +++++++++++---------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 81f0c431578f3..b1203d5bf89e6 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -73,42 +73,29 @@ cdef object parse_slashed_date(object date_string, bint dayfirst, const char* buf Py_ssize_t length int day, month, year - int part1, part2 buf = get_c_string_buf_and_size(date_string, &length) if length != 10 or strchr(delimiters, buf[2]) == NULL \ or strchr(delimiters, buf[5]) == NULL: return None - part1 = _parse_2digit(buf) - part2 = _parse_2digit(buf + 3) + month = _parse_2digit(buf) + day = _parse_2digit(buf + 3) year = _parse_4digit(buf + 6) - if part1 < 0 or part2 < 0 or year < 0: - # some part is not an integer, so it's not a dd/mm/yyyy date + if month < 0 or day < 0 or year < 0: + # some part is not an integer, so it's not a mm/dd/yyyy date return None - if part1 < 1 or part2 < 1 or \ - part1 > MAX_DAYS_IN_MONTH or part2 > MAX_DAYS_IN_MONTH or \ - (part1 > MAX_MONTH and part2 > MAX_MONTH): - raise DateParseError("Invalid date specified (%d/%d)" % - (part1, part2)) - - if part1 > MAX_MONTH: - day = part1 - month = part2 - elif part2 > MAX_MONTH: - day = part2 - month = part1 - elif dayfirst: - day = part1 - month = part2 - else: - day = part2 - month = part1 + if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ + and (month <= MAX_MONTH or day <= MAX_MONTH): + if month > MAX_MONTH or (day < MAX_MONTH and dayfirst): + day, month = month, day + return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, + 0, 0, 0, 0, tzinfo, PyDateTimeAPI.DateTimeType) + + raise DateParseError("Invalid date specified (%d/%d)" % + (month, day)) - return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, - 0, 0, 0, 0, tzinfo, - PyDateTimeAPI.DateTimeType) def parse_datetime_string(date_string, freq=None, dayfirst=False, From 0546e0a9a7f372e421080093745ab21fccb3b8be Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Mar 2019 15:14:19 +0300 Subject: [PATCH 06/32] removed micro-bench --- asv_bench/benchmarks/io/parsers.py | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 asv_bench/benchmarks/io/parsers.py diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py deleted file mode 100644 index 35df41eba9ce6..0000000000000 --- a/asv_bench/benchmarks/io/parsers.py +++ /dev/null @@ -1,21 +0,0 @@ -import numpy as np - -from pandas._libs.tslib import array_to_datetime - - -class ParseDateString(object): - params = (['mY', 'mdY', 'mQY', 'hm'],) - params_name = ['value'] - objects = { - 'hm': ['21:34'], - 'mY': ['01-2019', '1-2019'], - 'mdY': ['12/02/2010'], - 'mQY': ['1Q09', '1Q2000', '09Q1', '2000Q1'] - } - - def setup(self, value): - count_elem = 100000 - self.data = np.array(self.objects[value] * count_elem, dtype=np.object) - - def time_parse_datestring(self, value): - array_to_datetime(self.data) From 4a673ff1c70079ec6c23e308413e649c7f2f35b9 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Fri, 29 Mar 2019 07:35:04 -0500 Subject: [PATCH 07/32] Support mm-yyyy along with mm-dd-yyyy --- pandas/_libs/tslibs/parsing.pyx | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index b1203d5bf89e6..4ee0dc1e73545 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -53,6 +53,9 @@ DEF delimiters = b' /-\\' DEF MAX_DAYS_IN_MONTH = 31 DEF MAX_MONTH = 12 +cdef bint _is_not_delimiter(const char ch): + return strchr(delimiters, ch) == NULL + cdef inline int _parse_2digit(const char* s): cdef int result = 0 result += getdigit_ascii(s[0], -10) * 10 @@ -75,13 +78,21 @@ cdef object parse_slashed_date(object date_string, bint dayfirst, int day, month, year buf = get_c_string_buf_and_size(date_string, &length) - if length != 10 or strchr(delimiters, buf[2]) == NULL \ - or strchr(delimiters, buf[5]) == NULL: + if length == 10: + if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): + return None + month = _parse_2digit(buf) + day = _parse_2digit(buf + 3) + year = _parse_4digit(buf + 6) + elif length == 7: + if _is_not_delimiter(buf[2]): + return None + month = _parse_2digit(buf) + day = 1 + year = _parse_4digit(buf + 3) + else: return None - month = _parse_2digit(buf) - day = _parse_2digit(buf + 3) - year = _parse_4digit(buf + 6) if month < 0 or day < 0 or year < 0: # some part is not an integer, so it's not a mm/dd/yyyy date return None From 23df42604f3ddf054b22b2dca1b5e6afe34ea2e1 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Fri, 29 Mar 2019 07:43:32 -0500 Subject: [PATCH 08/32] Rename parse_slashed_date to parse_delimited_date --- pandas/_libs/tslibs/parsing.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 4ee0dc1e73545..68a9f6f433386 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -70,13 +70,14 @@ cdef inline int _parse_4digit(const char* s): result += getdigit_ascii(s[3], -10000) * 1 return result -cdef object parse_slashed_date(object date_string, bint dayfirst, - object tzinfo): +cdef inline object parse_delimited_date(object date_string, bint dayfirst, + object tzinfo): cdef: const char* buf Py_ssize_t length - int day, month, year + int day = 1, month = 1, year + assert isinstance(date_string, (str, unicode)) buf = get_c_string_buf_and_size(date_string, &length) if length == 10: if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): @@ -88,7 +89,6 @@ cdef object parse_slashed_date(object date_string, bint dayfirst, if _is_not_delimiter(buf[2]): return None month = _parse_2digit(buf) - day = 1 year = _parse_4digit(buf + 3) else: return None @@ -131,7 +131,7 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=yearfirst, **kwargs) return dt - dt = parse_slashed_date(date_string, dayfirst, _DEFAULT_TZINFO) + dt = parse_delimited_date(date_string, dayfirst, _DEFAULT_TZINFO) if dt is not None: return dt From 3538566cbd1eed778290f9ea11d900ca7468d23c Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Fri, 29 Mar 2019 07:53:35 -0500 Subject: [PATCH 09/32] Speed up parse_datetime_string_with_reso --- pandas/_libs/tslibs/parsing.pyx | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 68a9f6f433386..f60480a0e02b4 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -81,28 +81,30 @@ cdef inline object parse_delimited_date(object date_string, bint dayfirst, buf = get_c_string_buf_and_size(date_string, &length) if length == 10: if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): - return None + return None, None month = _parse_2digit(buf) day = _parse_2digit(buf + 3) year = _parse_4digit(buf + 6) + reso = 'day' elif length == 7: if _is_not_delimiter(buf[2]): - return None + return None, None month = _parse_2digit(buf) year = _parse_4digit(buf + 3) + reso = 'month' else: - return None + return None, None if month < 0 or day < 0 or year < 0: # some part is not an integer, so it's not a mm/dd/yyyy date - return None + return None, None if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ and (month <= MAX_MONTH or day <= MAX_MONTH): if month > MAX_MONTH or (day < MAX_MONTH and dayfirst): day, month = month, day return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, - 0, 0, 0, 0, tzinfo, PyDateTimeAPI.DateTimeType) + 0, 0, 0, 0, tzinfo, PyDateTimeAPI.DateTimeType), reso raise DateParseError("Invalid date specified (%d/%d)" % (month, day)) @@ -131,7 +133,7 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=yearfirst, **kwargs) return dt - dt = parse_delimited_date(date_string, dayfirst, _DEFAULT_TZINFO) + dt, _ = parse_delimited_date(date_string, dayfirst, _DEFAULT_TZINFO) if dt is not None: return dt @@ -215,6 +217,10 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, if not _does_string_look_like_datetime(date_string): raise ValueError('Given date string not likely a datetime.') + parsed, reso = parse_delimited_date(date_string, dayfirst, _DEFAULT_TZINFO) + if parsed is not None: + return parsed, parsed, reso + try: return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) except DateParseError: From 4d4df11553cc7dafd1a34282702806e425e31d81 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Mar 2019 19:44:35 +0300 Subject: [PATCH 10/32] fix code style --- asv_bench/benchmarks/io/csv.py | 2 +- pandas/_libs/tslibs/parsing.pyx | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 5c727bc001e14..260c1c3905628 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -255,7 +255,7 @@ class ReadCSVParseSpecialDate(StringIORewind): params = (['mY', 'mdY'],) params_name = ['value'] objects = { - 'mY': '01-2019\n10-2019\n02/2000\n', + 'mY': '01-2019\n10-2019\n02/2000\n', 'mdY': '12/02/2010\n' } diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index f60480a0e02b4..789e6ae8d827c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -103,12 +103,11 @@ cdef inline object parse_delimited_date(object date_string, bint dayfirst, and (month <= MAX_MONTH or day <= MAX_MONTH): if month > MAX_MONTH or (day < MAX_MONTH and dayfirst): day, month = month, day - return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, - 0, 0, 0, 0, tzinfo, PyDateTimeAPI.DateTimeType), reso - - raise DateParseError("Invalid date specified (%d/%d)" % - (month, day)) + return PyDateTimeAPI.DateTime_FromDateAndTime( + year, month, day, 0, 0, 0, 0, tzinfo, PyDateTimeAPI.DateTimeType + ), reso + raise DateParseError("Invalid date specified (%d/%d)" % (month, day)) def parse_datetime_string(date_string, freq=None, dayfirst=False, From 504de84b66bed89f7644b226d0bf77d016f5189c Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Mon, 1 Apr 2019 14:28:40 +0300 Subject: [PATCH 11/32] Move to datetime_new, add docstring to _parse_delimited_date --- pandas/_libs/tslibs/parsing.pyx | 38 ++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 789e6ae8d827c..b53d11faa355b 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -7,9 +7,9 @@ import time from io import StringIO from libc.string cimport strchr -from cpython.datetime cimport datetime, PyDateTime_IMPORT, PyDateTimeAPI -PyDateTime_IMPORT +from cpython.datetime cimport datetime, datetime_new, import_datetime +import_datetime() import numpy as np @@ -41,7 +41,6 @@ class DateParseError(ValueError): _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) -_DEFAULT_TZINFO = _DEFAULT_DATETIME.tzinfo cdef: object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])') @@ -49,19 +48,22 @@ cdef: set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} # ---------------------------------------------------------------------- -DEF delimiters = b' /-\\' -DEF MAX_DAYS_IN_MONTH = 31 -DEF MAX_MONTH = 12 +cdef: + const char* delimiters = " /-\\." + int MAX_DAYS_IN_MONTH = 31, MAX_MONTH = 12 + cdef bint _is_not_delimiter(const char ch): return strchr(delimiters, ch) == NULL + cdef inline int _parse_2digit(const char* s): cdef int result = 0 result += getdigit_ascii(s[0], -10) * 10 result += getdigit_ascii(s[1], -100) * 1 return result + cdef inline int _parse_4digit(const char* s): cdef int result = 0 result += getdigit_ascii(s[0], -10) * 1000 @@ -70,14 +72,22 @@ cdef inline int _parse_4digit(const char* s): result += getdigit_ascii(s[3], -10000) * 1 return result -cdef inline object parse_delimited_date(object date_string, bint dayfirst, - object tzinfo): + +cdef inline object _parse_delimited_date(object date_string, bint dayfirst): + """ + Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY + Delimiter can be a space or one of ./\- + + Returns one of: + --------------- + * datetime and resolution + * None, None if passed in not a handled date pattern + """ cdef: const char* buf Py_ssize_t length int day = 1, month = 1, year - assert isinstance(date_string, (str, unicode)) buf = get_c_string_buf_and_size(date_string, &length) if length == 10: if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): @@ -103,11 +113,9 @@ cdef inline object parse_delimited_date(object date_string, bint dayfirst, and (month <= MAX_MONTH or day <= MAX_MONTH): if month > MAX_MONTH or (day < MAX_MONTH and dayfirst): day, month = month, day - return PyDateTimeAPI.DateTime_FromDateAndTime( - year, month, day, 0, 0, 0, 0, tzinfo, PyDateTimeAPI.DateTimeType - ), reso + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso - raise DateParseError("Invalid date specified (%d/%d)" % (month, day)) + raise DateParseError("Invalid date specified ({}/{})".format(month, day)) def parse_datetime_string(date_string, freq=None, dayfirst=False, @@ -132,7 +140,7 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=yearfirst, **kwargs) return dt - dt, _ = parse_delimited_date(date_string, dayfirst, _DEFAULT_TZINFO) + dt, _ = _parse_delimited_date(date_string, dayfirst) if dt is not None: return dt @@ -216,7 +224,7 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, if not _does_string_look_like_datetime(date_string): raise ValueError('Given date string not likely a datetime.') - parsed, reso = parse_delimited_date(date_string, dayfirst, _DEFAULT_TZINFO) + parsed, reso = _parse_delimited_date(date_string, dayfirst) if parsed is not None: return parsed, parsed, reso From 0613e66aef0bef39b38b8cc1e871b79d1f049e48 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Mon, 1 Apr 2019 14:01:44 -0500 Subject: [PATCH 12/32] Add whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d6d572bcb9889..b29c20a725707 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -250,6 +250,7 @@ Performance Improvements - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) - Improved performance of :meth:DataFrame.`to_csv` when write datetime dtype data (:issue:`25708`) +- Improved performance of :meth:`read_csv` by much faster parsing of MM/YYYY and DD/MM/YYYY datetime formats (:issue:`25922`) .. _whatsnew_0250.bug_fixes: From b985e3785c1dda877ae4fad955c5cd464561e137 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 2 Apr 2019 15:49:13 +0300 Subject: [PATCH 13/32] fix parsing MM/YYYY for MM > 12 --- pandas/_libs/tslibs/parsing.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index b53d11faa355b..34cad15f7b475 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -87,6 +87,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): const char* buf Py_ssize_t length int day = 1, month = 1, year + bint can_swap = 0 buf = get_c_string_buf_and_size(date_string, &length) if length == 10: @@ -96,6 +97,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): day = _parse_2digit(buf + 3) year = _parse_4digit(buf + 6) reso = 'day' + can_swap = 1 elif length == 7: if _is_not_delimiter(buf[2]): return None, None @@ -111,7 +113,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ and (month <= MAX_MONTH or day <= MAX_MONTH): - if month > MAX_MONTH or (day < MAX_MONTH and dayfirst): + if (month > MAX_MONTH or (day < MAX_MONTH and dayfirst)) and can_swap: day, month = month, day return datetime_new(year, month, day, 0, 0, 0, 0, None), reso From f2843e111b0ea5d910bd1759a3e9878afb0799c2 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 2 Apr 2019 15:50:09 +0300 Subject: [PATCH 14/32] added tests for parse_delimited_date --- pandas/tests/io/parser/test_parse_dates.py | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1da0b60fc733a..5ea8c255fd7b9 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -849,3 +849,51 @@ def test_parse_timezone(all_parsers): expected = DataFrame(expected_data) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("datestring", [ + "32/32/2019", + "02/30/2019", + "13/13/2019", + "13/2019", + "a3/11/2018", + "10/11/2o17" + ]) +def test_invalid_parse_delimited_date(all_parsers, datestring): + parser = all_parsers + expected = DataFrame({0: [datestring]}, dtype="object") + result = parser.read_csv(StringIO(datestring), + header=None, parse_dates=[0]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("date_format", [ + "%m %d %Y", + "%m %Y" + ]) +def test_parse_delimited_date(all_parsers, date_format): + parser = all_parsers + delims = ' -.\\/' + date = datetime(2019, 4, 1) + data = '\n'.join(date.strftime(date_format.replace(' ', delim)) + for delim in delims) + expected = DataFrame({0: [date] * len(delims)}, dtype="datetime64[ns]") + result = parser.read_csv(StringIO(data), header=None, parse_dates=[0]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("datestring,dayfirst,expected", [ + # DD/MM/YYYY; month > 12 thus replacement + ("13/02/2019", False, datetime(2019, 2, 13)), + ("13/02/2019", True, datetime(2019, 2, 13)), + ("02/13/2019", False, datetime(2019, 2, 13)), + ("02/13/2019", True, datetime(2019, 2, 13)), + # DD/MM/YYYY; dayfirst==True thus replacement + ("04/02/2019", True, datetime(2019, 2, 4)) + ]) +def test_parse_delimited_date_swap(all_parsers, datestring, dayfirst, expected): + parser = all_parsers + expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") + result = parser.read_csv(StringIO(datestring), header=None, + dayfirst=dayfirst, parse_dates=[0]) + tm.assert_frame_equal(result, expected) \ No newline at end of file From 4f6600415275605c39b826382d885585b99fb82c Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 2 Apr 2019 21:54:26 +0300 Subject: [PATCH 15/32] fix flake8 bugs in test_parse_dates.py --- pandas/tests/io/parser/test_parse_dates.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 5ea8c255fd7b9..2be14de9edbc4 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -858,7 +858,7 @@ def test_parse_timezone(all_parsers): "13/2019", "a3/11/2018", "10/11/2o17" - ]) +]) def test_invalid_parse_delimited_date(all_parsers, datestring): parser = all_parsers expected = DataFrame({0: [datestring]}, dtype="object") @@ -870,7 +870,7 @@ def test_invalid_parse_delimited_date(all_parsers, datestring): @pytest.mark.parametrize("date_format", [ "%m %d %Y", "%m %Y" - ]) +]) def test_parse_delimited_date(all_parsers, date_format): parser = all_parsers delims = ' -.\\/' @@ -890,10 +890,11 @@ def test_parse_delimited_date(all_parsers, date_format): ("02/13/2019", True, datetime(2019, 2, 13)), # DD/MM/YYYY; dayfirst==True thus replacement ("04/02/2019", True, datetime(2019, 2, 4)) - ]) -def test_parse_delimited_date_swap(all_parsers, datestring, dayfirst, expected): +]) +def test_parse_delimited_date_swap(all_parsers, datestring, + dayfirst, expected): parser = all_parsers expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") result = parser.read_csv(StringIO(datestring), header=None, dayfirst=dayfirst, parse_dates=[0]) - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) From ac6e3483cd56677c9348b039ffc8c2bdf7dcda96 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Wed, 3 Apr 2019 15:29:13 +0300 Subject: [PATCH 16/32] Fix date parsing for Python <= 3.6.0 --- pandas/_libs/tslibs/parsing.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 34cad15f7b475..ad8916f6584d5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -9,6 +9,7 @@ from io import StringIO from libc.string cimport strchr from cpython.datetime cimport datetime, datetime_new, import_datetime +from cpython.version cimport PY_VERSION_HEX import_datetime() import numpy as np @@ -115,7 +116,11 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): and (month <= MAX_MONTH or day <= MAX_MONTH): if (month > MAX_MONTH or (day < MAX_MONTH and dayfirst)) and can_swap: day, month = month, day - return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + if PY_VERSION_HEX >= 0x03060100: + # In Python <= 3.6.0 there is no range checking for invalid dates + # in C api, thus we call faster C version for 3.6.1 or newer + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError("Invalid date specified ({}/{})".format(month, day)) From 5384ebe12f7f81c99f5cb549f8a26071b708e504 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Wed, 3 Apr 2019 23:21:04 +0300 Subject: [PATCH 17/32] removed parsing MM.YYYY format, because, for example, 10.2019 interpreted as a float number, on the other hand 09.2019 can be parsed as date --- pandas/_libs/tslibs/parsing.pyx | 5 +++-- pandas/tests/io/parser/test_parse_dates.py | 13 ++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ad8916f6584d5..505e4d7a25081 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -77,7 +77,8 @@ cdef inline int _parse_4digit(const char* s): cdef inline object _parse_delimited_date(object date_string, bint dayfirst): """ Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY - Delimiter can be a space or one of ./\- + For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of ./\- + For MM/YYYY: delimiter can be a space or one of /\- Returns one of: --------------- @@ -100,7 +101,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): reso = 'day' can_swap = 1 elif length == 7: - if _is_not_delimiter(buf[2]): + if _is_not_delimiter(buf[2]) or buf[2] == b'.': return None, None month = _parse_2digit(buf) year = _parse_4digit(buf + 3) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2be14de9edbc4..6fda1b5d39345 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -867,17 +867,16 @@ def test_invalid_parse_delimited_date(all_parsers, datestring): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("date_format", [ - "%m %d %Y", - "%m %Y" +@pytest.mark.parametrize("date_format, delimiters", [ + ("%m %d %Y", " -.\\/"), + ("%m %Y", " -\\/") ]) -def test_parse_delimited_date(all_parsers, date_format): +def test_parse_delimited_date(all_parsers, date_format, delimiters): parser = all_parsers - delims = ' -.\\/' date = datetime(2019, 4, 1) data = '\n'.join(date.strftime(date_format.replace(' ', delim)) - for delim in delims) - expected = DataFrame({0: [date] * len(delims)}, dtype="datetime64[ns]") + for delim in delimiters) + expected = DataFrame({0: [date] * len(delimiters)}, dtype="datetime64[ns]") result = parser.read_csv(StringIO(data), header=None, parse_dates=[0]) tm.assert_frame_equal(result, expected) From 889ef7a6d4700d0d4e0d780fe817220a3b37695e Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Thu, 4 Apr 2019 12:22:36 +0300 Subject: [PATCH 18/32] Remove whatsnew entry for the change It turned out that there is no change in behaviour --- pandas/_libs/tslibs/parsing.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 505e4d7a25081..6867158030188 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -101,7 +101,9 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): reso = 'day' can_swap = 1 elif length == 7: - if _is_not_delimiter(buf[2]) or buf[2] == b'.': + if buf[2] == b'.' or _is_not_delimiter(buf[2]): + # we cannot reliably tell whether e.g. 10.2010 is a float + # or a date, thus we refuse to parse it here return None, None month = _parse_2digit(buf) year = _parse_4digit(buf + 3) From a6926e7ee10681bf6b5199d1e73097f000e1ea65 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Thu, 4 Apr 2019 12:27:07 +0300 Subject: [PATCH 19/32] Remove duplicate parsing of MM-YYYY in _parse_dateabbr_string This pattern is already handled by _parse_delimited_date --- pandas/_libs/tslibs/parsing.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 6867158030188..beffd751bbf82 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -371,7 +371,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, except ValueError: pass - for pat in ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y']: + for pat in ['%Y-%m', '%b %Y', '%b-%Y']: try: ret = datetime.strptime(date_string, pat) return ret, ret, 'month' From b7cd6b127e93c3f27455df467f56f478f958b03f Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 5 Apr 2019 12:45:56 +0300 Subject: [PATCH 20/32] added some comments in _parse_delimited_date --- pandas/_libs/tslibs/parsing.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index beffd751bbf82..e33bedab8134f 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -93,6 +93,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): buf = get_c_string_buf_and_size(date_string, &length) if length == 10: + # parsing MM?DD?YYYY and DD?MM?YYYY dates if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): return None, None month = _parse_2digit(buf) @@ -101,6 +102,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): reso = 'day' can_swap = 1 elif length == 7: + # parsing MM?YYYY dates if buf[2] == b'.' or _is_not_delimiter(buf[2]): # we cannot reliably tell whether e.g. 10.2010 is a float # or a date, thus we refuse to parse it here @@ -112,7 +114,8 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): return None, None if month < 0 or day < 0 or year < 0: - # some part is not an integer, so it's not a mm/dd/yyyy date + # some part is not an integer, so + # date_string can't be converted to date, above format return None, None if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ From 4a2929d2de61e9c6957cacc4358624e9cfcdc071 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 5 Apr 2019 13:02:15 +0300 Subject: [PATCH 21/32] fix docstring in _parse_delimited_date --- pandas/_libs/tslibs/parsing.pyx | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index e33bedab8134f..5a01a89702a37 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -77,13 +77,22 @@ cdef inline int _parse_4digit(const char* s): cdef inline object _parse_delimited_date(object date_string, bint dayfirst): """ Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY + + Note + ---- For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of ./\- For MM/YYYY: delimiter can be a space or one of /\- + If `date_string` can't be converted to date, then function returns + None, None + + Parameters + ---------- + date_string : str + dayfirst : bint - Returns one of: - --------------- - * datetime and resolution - * None, None if passed in not a handled date pattern + Returns: + -------- + datetime, resolution """ cdef: const char* buf From 4bc1821c6a36ee964e31eaa7f2db11af4e1300ca Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 8 Apr 2019 21:05:28 +0300 Subject: [PATCH 22/32] fix bug when parsing 01/12/2019 with dayfirst==True --- pandas/_libs/tslibs/parsing.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 5a01a89702a37..f886a7ddfb202 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -129,7 +129,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ and (month <= MAX_MONTH or day <= MAX_MONTH): - if (month > MAX_MONTH or (day < MAX_MONTH and dayfirst)) and can_swap: + if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: day, month = month, day if PY_VERSION_HEX >= 0x03060100: # In Python <= 3.6.0 there is no range checking for invalid dates From a43fa7bbf6c5d8c640aa2b0c8440979bba5fe0b6 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 8 Apr 2019 21:21:34 +0300 Subject: [PATCH 23/32] first attemp to use hypothesis in tests --- pandas/tests/io/parser/test_parse_dates.py | 34 +++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 6fda1b5d39345..c4cfbd4661930 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -6,6 +6,10 @@ """ from datetime import date, datetime +from dateutil.parser import parse as du_parse + +from hypothesis import given, strategies as st + from io import StringIO from dateutil.parser import parse @@ -15,7 +19,8 @@ from pandas._libs.tslib import Timestamp from pandas._libs.tslibs import parsing -from pandas.compat import lrange +from pandas._libs.tslibs.parsing import parse_datetime_string +from pandas.compat import lrange, parse_date from pandas.compat.numpy import np_array_datetime64_compat import pandas as pd @@ -897,3 +902,30 @@ def test_parse_delimited_date_swap(all_parsers, datestring, result = parser.read_csv(StringIO(datestring), header=None, dayfirst=dayfirst, parse_dates=[0]) tm.assert_frame_equal(result, expected) + + +gen_random_datetime = st.dates( + min_value=date(1000, 1, 1), + max_value=date(9999, 12, 31) +) +_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, + second=0, microsecond=0) + + +@given(gen_random_datetime) +@pytest.mark.parametrize("date_format, delimiters, dayfirst", [ + ("%m %d %Y", " -./", False), + ("%m %d %Y", " -./", True), + ("%d %m %Y", " -./", True), + ("%d %m %Y", " -./", False), + ("%m %Y", " -/", False), + ("%m %Y", " -/", True) +]) +def test_hypothesis_delimited_date(date_format, delimiters, dayfirst, date): + date_strings = [date.strftime(date_format.replace(' ', delim)) + for delim in delimiters] + for date_string in date_strings: + result = parse_datetime_string(date_string, dayfirst=dayfirst) + expected = du_parse(date_string, default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=False) + assert result == expected From 710a287e16f41b62e70400e0f7ba7adcf0dbb096 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 8 Apr 2019 22:29:39 +0300 Subject: [PATCH 24/32] apply isort on pandas/tests/io/parser/test_parse_dates.py --- pandas/tests/io/parser/test_parse_dates.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index c4cfbd4661930..112466e3d614d 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -6,12 +6,10 @@ """ from datetime import date, datetime -from dateutil.parser import parse as du_parse - -from hypothesis import given, strategies as st - from io import StringIO +from dateutil.parser import parse as du_parse +from hypothesis import given, strategies as st from dateutil.parser import parse import numpy as np import pytest From 859e312edac4462684abe04a6d9f0e270698a49f Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 9 Apr 2019 00:10:24 +0300 Subject: [PATCH 25/32] added new '%Y %m %d' format and 2 @pytest.mark.parametrize for test_hypothesis_delimited_date --- pandas/tests/io/parser/test_parse_dates.py | 35 +++++++++++----------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 112466e3d614d..3fb069d0276d3 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -906,24 +906,25 @@ def test_parse_delimited_date_swap(all_parsers, datestring, min_value=date(1000, 1, 1), max_value=date(9999, 12, 31) ) -_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, - second=0, microsecond=0) +_DEFAULT_DATETIME = datetime(1, 1, 1) @given(gen_random_datetime) -@pytest.mark.parametrize("date_format, delimiters, dayfirst", [ - ("%m %d %Y", " -./", False), - ("%m %d %Y", " -./", True), - ("%d %m %Y", " -./", True), - ("%d %m %Y", " -./", False), - ("%m %Y", " -/", False), - ("%m %Y", " -/", True) +@pytest.mark.parametrize("delimiter", list(" -./")) +@pytest.mark.parametrize("dayfirst", [True, False]) +@pytest.mark.parametrize("date_format", [ + "%m %d %Y", + "%d %m %Y", + "%m %Y", + "%Y %m %d" ]) -def test_hypothesis_delimited_date(date_format, delimiters, dayfirst, date): - date_strings = [date.strftime(date_format.replace(' ', delim)) - for delim in delimiters] - for date_string in date_strings: - result = parse_datetime_string(date_string, dayfirst=dayfirst) - expected = du_parse(date_string, default=_DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=False) - assert result == expected +def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, date): + if date_format == "%m %Y" and delimiter == ".": + # parse_datetime_string cannot reliably tell whether e.g. %m.%Y + # is a float or a date, thus we skip it + pytest.skip() + date_string = date.strftime(date_format.replace(' ', delimiter)) + result = parse_datetime_string(date_string, dayfirst=dayfirst) + expected = du_parse(date_string, default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=False) + assert result == expected From b41ea63fea905af52492b6c5b40502174a6cbf42 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 9 Apr 2019 13:19:38 +0300 Subject: [PATCH 26/32] removed test_parse_delimited_date; added next formats: '%y %m %d', '%Y%m%d', '%y%m%d' for test_hypothesis_delimited_date; created _helper_hypothesis_delimited_date func --- pandas/tests/io/parser/test_parse_dates.py | 86 ++++++++++++---------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 3fb069d0276d3..0c57ec81beca6 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -29,6 +29,14 @@ import pandas.io.date_converters as conv import pandas.io.parsers as parsers +# constant +_DEFAULT_DATETIME = datetime(1, 1, 1) +# Strategy for hypothesis +gen_random_datetime = st.dates( + min_value=date(1900, 1, 1), # on Windows for %y need: year > 1900 + max_value=date(9999, 12, 31) +) + def test_separator_date_conflict(all_parsers): # Regression test for gh-4678 @@ -854,7 +862,7 @@ def test_parse_timezone(all_parsers): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("datestring", [ +@pytest.mark.parametrize("date_string", [ "32/32/2019", "02/30/2019", "13/13/2019", @@ -862,69 +870,71 @@ def test_parse_timezone(all_parsers): "a3/11/2018", "10/11/2o17" ]) -def test_invalid_parse_delimited_date(all_parsers, datestring): +def test_invalid_parse_delimited_date(all_parsers, date_string): parser = all_parsers - expected = DataFrame({0: [datestring]}, dtype="object") - result = parser.read_csv(StringIO(datestring), + expected = DataFrame({0: [date_string]}, dtype="object") + result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0]) tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("date_format, delimiters", [ - ("%m %d %Y", " -.\\/"), - ("%m %Y", " -\\/") -]) -def test_parse_delimited_date(all_parsers, date_format, delimiters): - parser = all_parsers - date = datetime(2019, 4, 1) - data = '\n'.join(date.strftime(date_format.replace(' ', delim)) - for delim in delimiters) - expected = DataFrame({0: [date] * len(delimiters)}, dtype="datetime64[ns]") - result = parser.read_csv(StringIO(data), header=None, parse_dates=[0]) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("datestring,dayfirst,expected", [ - # DD/MM/YYYY; month > 12 thus replacement - ("13/02/2019", False, datetime(2019, 2, 13)), - ("13/02/2019", True, datetime(2019, 2, 13)), - ("02/13/2019", False, datetime(2019, 2, 13)), - ("02/13/2019", True, datetime(2019, 2, 13)), - # DD/MM/YYYY; dayfirst==True thus replacement - ("04/02/2019", True, datetime(2019, 2, 4)) +@pytest.mark.parametrize("date_string,dayfirst,expected", [ + # %d/%m/%Y; month > 12 thus replacement + ("13\\02\\2019", False, datetime(2019, 2, 13)), + ("13\\02\\2019", True, datetime(2019, 2, 13)), + # %m/%d/%Y; day > 12 thus there will be no replacement + ("02\\13\\2019", False, datetime(2019, 2, 13)), + ("02\\13\\2019", True, datetime(2019, 2, 13)), + # %d/%m/%Y; dayfirst==True thus replacement + ("04\\02\\2019", True, datetime(2019, 2, 4)) ]) -def test_parse_delimited_date_swap(all_parsers, datestring, +def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected): parser = all_parsers expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") - result = parser.read_csv(StringIO(datestring), header=None, + result = parser.read_csv(StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0]) tm.assert_frame_equal(result, expected) -gen_random_datetime = st.dates( - min_value=date(1000, 1, 1), - max_value=date(9999, 12, 31) -) -_DEFAULT_DATETIME = datetime(1, 1, 1) +def _helper_hypothesis_delimited_date(call, date_string, **kwargs): + msg, result = None, None + try: + result = call(date_string, **kwargs) + except ValueError as er: + msg = str(er) + pass + return msg, result @given(gen_random_datetime) @pytest.mark.parametrize("delimiter", list(" -./")) @pytest.mark.parametrize("dayfirst", [True, False]) @pytest.mark.parametrize("date_format", [ - "%m %d %Y", "%d %m %Y", + "%m %d %Y", "%m %Y", - "%Y %m %d" + "%Y %m %d", + "%y %m %d", + "%Y%m%d", + "%y%m%d", ]) def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, date): if date_format == "%m %Y" and delimiter == ".": # parse_datetime_string cannot reliably tell whether e.g. %m.%Y # is a float or a date, thus we skip it pytest.skip() + result, expected = None, None + except_in_dateutil, except_out_dateutil = None, None date_string = date.strftime(date_format.replace(' ', delimiter)) - result = parse_datetime_string(date_string, dayfirst=dayfirst) - expected = du_parse(date_string, default=_DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=False) + + except_out_dateutil, result = _helper_hypothesis_delimited_date( + parse_datetime_string, date_string, + dayfirst=dayfirst) + except_in_dateutil, expected = _helper_hypothesis_delimited_date( + du_parse, date_string, + default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=False) + + assert except_out_dateutil == except_in_dateutil assert result == expected From 6fad4f4e0f2abc3cb1156628b15e0cda345101d7 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 9 Apr 2019 15:21:18 +0300 Subject: [PATCH 27/32] added message for pytest.skip(); more complete docstring in _parse_delimited_date now --- pandas/_libs/tslibs/parsing.pyx | 10 +++++++--- pandas/tests/io/parser/test_parse_dates.py | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index f886a7ddfb202..034a520b3905b 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -76,12 +76,16 @@ cdef inline int _parse_4digit(const char* s): cdef inline object _parse_delimited_date(object date_string, bint dayfirst): """ - Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY + Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY. + At the beginning function tries to parse date in MM/DD/YYYY format, but + if month > 12 - in DD/MM/YYYY (`dayfirst == False`). + With `dayfirst == True` function makes an attempt to parse date in + DD/MM/YYYY, if an attemp is wrong - in DD/MM/YYYY Note ---- - For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of ./\- - For MM/YYYY: delimiter can be a space or one of /\- + For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of ./-\\ + For MM/YYYY: delimiter can be a space or one of /-\\ If `date_string` can't be converted to date, then function returns None, None diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 0c57ec81beca6..2e400fa991e5c 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -31,6 +31,7 @@ # constant _DEFAULT_DATETIME = datetime(1, 1, 1) + # Strategy for hypothesis gen_random_datetime = st.dates( min_value=date(1900, 1, 1), # on Windows for %y need: year > 1900 @@ -921,9 +922,8 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): ]) def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, date): if date_format == "%m %Y" and delimiter == ".": - # parse_datetime_string cannot reliably tell whether e.g. %m.%Y - # is a float or a date, thus we skip it - pytest.skip() + pytest.skip("parse_datetime_string cannot reliably tell whether \ + e.g. %m.%Y is a float or a date, thus we skip it") result, expected = None, None except_in_dateutil, except_out_dateutil = None, None date_string = date.strftime(date_format.replace(' ', delimiter)) From 7113c75757bd640cdd5355f3f16e0f1764282200 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 9 Apr 2019 15:55:55 +0300 Subject: [PATCH 28/32] removed \ delimiter --- pandas/_libs/tslibs/parsing.pyx | 6 +++--- pandas/tests/io/parser/test_parse_dates.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 034a520b3905b..de8872c6e9b8c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -50,7 +50,7 @@ cdef: # ---------------------------------------------------------------------- cdef: - const char* delimiters = " /-\\." + const char* delimiters = " /-." int MAX_DAYS_IN_MONTH = 31, MAX_MONTH = 12 @@ -84,8 +84,8 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): Note ---- - For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of ./-\\ - For MM/YYYY: delimiter can be a space or one of /-\\ + For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-. + For MM/YYYY: delimiter can be a space or one of /- If `date_string` can't be converted to date, then function returns None, None diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2e400fa991e5c..0f3427eeb9062 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -881,13 +881,13 @@ def test_invalid_parse_delimited_date(all_parsers, date_string): @pytest.mark.parametrize("date_string,dayfirst,expected", [ # %d/%m/%Y; month > 12 thus replacement - ("13\\02\\2019", False, datetime(2019, 2, 13)), - ("13\\02\\2019", True, datetime(2019, 2, 13)), + ("13/02/2019", False, datetime(2019, 2, 13)), + ("13/02/2019", True, datetime(2019, 2, 13)), # %m/%d/%Y; day > 12 thus there will be no replacement - ("02\\13\\2019", False, datetime(2019, 2, 13)), - ("02\\13\\2019", True, datetime(2019, 2, 13)), + ("02/13/2019", False, datetime(2019, 2, 13)), + ("02/13/2019", True, datetime(2019, 2, 13)), # %d/%m/%Y; dayfirst==True thus replacement - ("04\\02\\2019", True, datetime(2019, 2, 4)) + ("04/02/2019", True, datetime(2019, 2, 4)) ]) def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected): From d0bfd9161bbf20caa630d12ead88638bbe40ceaa Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 9 Apr 2019 21:18:40 +0300 Subject: [PATCH 29/32] using is_platform_windows() in date_strategy definition; changed date -> test_datetime --- pandas/tests/io/parser/test_parse_dates.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 0f3427eeb9062..341c525884579 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -18,7 +18,7 @@ from pandas._libs.tslib import Timestamp from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import parse_datetime_string -from pandas.compat import lrange, parse_date +from pandas.compat import lrange, parse_date, is_platform_windows from pandas.compat.numpy import np_array_datetime64_compat import pandas as pd @@ -33,10 +33,10 @@ _DEFAULT_DATETIME = datetime(1, 1, 1) # Strategy for hypothesis -gen_random_datetime = st.dates( - min_value=date(1900, 1, 1), # on Windows for %y need: year > 1900 - max_value=date(9999, 12, 31) -) +if is_platform_windows(): + date_strategy = st.datetimes(min_value=datetime(1900, 1, 1)) +else: + date_strategy = st.datetimes() def test_separator_date_conflict(all_parsers): @@ -908,7 +908,7 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): return msg, result -@given(gen_random_datetime) +@given(date_strategy) @pytest.mark.parametrize("delimiter", list(" -./")) @pytest.mark.parametrize("dayfirst", [True, False]) @pytest.mark.parametrize("date_format", [ @@ -920,13 +920,14 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): "%Y%m%d", "%y%m%d", ]) -def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, date): +def test_hypothesis_delimited_date(date_format, dayfirst, + delimiter, test_datetime): if date_format == "%m %Y" and delimiter == ".": pytest.skip("parse_datetime_string cannot reliably tell whether \ e.g. %m.%Y is a float or a date, thus we skip it") result, expected = None, None except_in_dateutil, except_out_dateutil = None, None - date_string = date.strftime(date_format.replace(' ', delimiter)) + date_string = test_datetime.strftime(date_format.replace(' ', delimiter)) except_out_dateutil, result = _helper_hypothesis_delimited_date( parse_datetime_string, date_string, From da845edf38da8ab6f3af3ac76c9b344869e4a7a0 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 9 Apr 2019 22:35:26 +0300 Subject: [PATCH 30/32] fixed import order; using @settings(deadline=None) now; dates with year < 1000 not processig in _parse_delimited_date now --- pandas/_libs/tslibs/parsing.pyx | 2 +- pandas/tests/io/parser/test_parse_dates.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index de8872c6e9b8c..bcbff355c96dd 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -126,7 +126,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): else: return None, None - if month < 0 or day < 0 or year < 0: + if month < 0 or day < 0 or year < 1000: # some part is not an integer, so # date_string can't be converted to date, above format return None, None diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 341c525884579..dbc0434e65fd7 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -9,7 +9,7 @@ from io import StringIO from dateutil.parser import parse as du_parse -from hypothesis import given, strategies as st +from hypothesis import given, settings, strategies as st from dateutil.parser import parse import numpy as np import pytest @@ -18,7 +18,7 @@ from pandas._libs.tslib import Timestamp from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import parse_datetime_string -from pandas.compat import lrange, parse_date, is_platform_windows +from pandas.compat import is_platform_windows, lrange from pandas.compat.numpy import np_array_datetime64_compat import pandas as pd @@ -909,6 +909,7 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): @given(date_strategy) +@settings(deadline=None) @pytest.mark.parametrize("delimiter", list(" -./")) @pytest.mark.parametrize("dayfirst", [True, False]) @pytest.mark.parametrize("date_format", [ From 13717ec7ce867f9aa7ac56fb18fe025fd1a8c775 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Thu, 18 Apr 2019 19:05:11 +0300 Subject: [PATCH 31/32] removed extra 'parse' import --- pandas/tests/io/parser/test_parse_dates.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index dbc0434e65fd7..f523b2910db8e 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -10,7 +10,6 @@ from dateutil.parser import parse as du_parse from hypothesis import given, settings, strategies as st -from dateutil.parser import parse import numpy as np import pytest import pytz @@ -451,7 +450,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): """ if "dayfirst" in kwargs: df = parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], - date_parser=lambda d: parse(d, **kwargs), + date_parser=lambda d: du_parse(d, **kwargs), header=0, index_col=0, parse_dates=True, na_values=["NA"]) exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1), @@ -463,7 +462,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): msg = "got an unexpected keyword argument 'day_first'" with pytest.raises(TypeError, match=msg): parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], - date_parser=lambda d: parse(d, **kwargs), + date_parser=lambda d: du_parse(d, **kwargs), skiprows=[0], index_col=0, parse_dates=True, na_values=["NA"]) From 2cd971af704fe952fc221bb1d254524c2acd795a Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 19 Apr 2019 09:15:09 +0300 Subject: [PATCH 32/32] _is_not_delimiter is inline now --- pandas/_libs/tslibs/parsing.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index bcbff355c96dd..4fc695d3a682c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -54,7 +54,7 @@ cdef: int MAX_DAYS_IN_MONTH = 31, MAX_MONTH = 12 -cdef bint _is_not_delimiter(const char ch): +cdef inline bint _is_not_delimiter(const char ch): return strchr(delimiters, ch) == NULL