From fa3fc631fc5d7bb43868e20c7339b918871b0b58 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 8 Nov 2018 21:32:14 -0600 Subject: [PATCH 1/3] PERF: Speeds up creation of Period, PeriodArray, with Offset freq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit master: ```python In [2]: freq = pd.tseries.offsets.Day() ...: ...: %timeit pd.Period("2001", freq=freq) 294 µs ± 5.53 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) In [3]: %timeit pd.Period._maybe_convert_freq(freq) ...: 64.7 µs ± 382 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) ``` branch: ```python In [2]: freq = pd.tseries.offsets.Day() ...: ...: %timeit pd.Period("2001", freq=freq) 158 µs ± 2.87 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) In [3]: %timeit pd.Period._maybe_convert_freq(freq) 193 ns ± 4.3 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each) ``` While looking at the profile plot in snakeviz, it seems like a lot of time in Period._maybe_convert_freq was spent importing modules. `_maybe_convert_freq` calls `offsets.to_offset`, which imports a Python function inside the method. Does Cython not handle this well? --- pandas/_libs/tslibs/period.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index ebcbea0ee30b3..cce65bbd6a218 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -49,7 +49,7 @@ from resolution import Resolution from nattype import nat_strings, NaT from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike from offsets cimport to_offset -from offsets import _Tick +from offsets import _Tick, _BaseOffset cdef bint PY2 = str == bytes cdef enum: @@ -1572,7 +1572,8 @@ cdef class _Period(object): code, stride = get_freq_code(freq) freq = get_freq_str(code, stride) - freq = to_offset(freq) + if not isinstance(freq, _BaseOffset): + freq = to_offset(freq) if freq.n <= 0: raise ValueError('Frequency must be positive, because it' From 8daba2cfc776ba7919208e09737653f2942cbf35 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 9 Nov 2018 06:27:57 -0600 Subject: [PATCH 2/3] move to to_offset --- pandas/_libs/tslibs/offsets.pyx | 2 ++ pandas/_libs/tslibs/period.pyx | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8f5887754e40d..b80d21d625bcf 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -84,6 +84,8 @@ cdef to_offset(object obj): Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime imports """ + if isinstance(obj, _BaseOffset): + return obj from pandas.tseries.frequencies import to_offset return to_offset(obj) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index cce65bbd6a218..13148737049d0 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -49,7 +49,7 @@ from resolution import Resolution from nattype import nat_strings, NaT from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike from offsets cimport to_offset -from offsets import _Tick, _BaseOffset +from offsets import _Tick cdef bint PY2 = str == bytes cdef enum: @@ -1572,9 +1572,6 @@ cdef class _Period(object): code, stride = get_freq_code(freq) freq = get_freq_str(code, stride) - if not isinstance(freq, _BaseOffset): - freq = to_offset(freq) - if freq.n <= 0: raise ValueError('Frequency must be positive, because it' ' represents span: {freqstr}' From 795a7d1de36c54a98d0835a8c53afebbbe983a23 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 9 Nov 2018 06:35:56 -0600 Subject: [PATCH 3/3] fixup --- pandas/_libs/tslibs/period.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 13148737049d0..a284d8fb544e7 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1567,11 +1567,12 @@ cdef class _Period(object): @classmethod def _maybe_convert_freq(cls, object freq): - if isinstance(freq, (int, tuple)): code, stride = get_freq_code(freq) freq = get_freq_str(code, stride) + freq = to_offset(freq) + if freq.n <= 0: raise ValueError('Frequency must be positive, because it' ' represents span: {freqstr}'