From eae91012690861df3c2f6f772bd1379f6ef7c4b9 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 7 Apr 2021 20:50:37 -0700 Subject: [PATCH 1/6] CLN: tighten cython declarations --- pandas/_libs/index_class_helper.pxi.in | 4 ++-- pandas/_libs/sparse.pyx | 2 ++ pandas/_libs/sparse_op_helper.pxi.in | 4 ++-- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 4 ++-- pandas/_libs/tslibs/fields.pyx | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index 8638c2c689c3f..f0351e06f2b8c 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -44,8 +44,8 @@ cdef class {{name}}Engine(IndexEngine): raise KeyError(val) {{endif}} - cdef void _call_map_locations(self, ndarray values): - self.mapping.map_locations(algos.ensure_{{name.lower()}}(values)) + cdef void _call_map_locations(self, ndarray[{{dtype}}_t] values): + self.mapping.map_locations(values) cdef _maybe_get_bool_indexer(self, object val): # Returns ndarray[bool] or int diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 0c3d8915b749b..134883e159407 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -618,6 +618,7 @@ cdef class BlockIndex(SparseIndex): pass +@cython.internal cdef class BlockMerge: """ Object-oriented approach makes sharing state between recursive functions a @@ -661,6 +662,7 @@ cdef class BlockMerge: self.yi = xi +@cython.internal cdef class BlockUnion(BlockMerge): """ Object-oriented approach makes sharing state between recursive functions a diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index ce665ca812131..36f22445d0373 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -302,8 +302,8 @@ cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, raise NotImplementedError -cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, - {{dtype}}_t yfill): +cdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, + {{dtype}}_t yfill): return {{(opname, 'xfill', 'yfill', dtype) | get_op}} {{endfor}} diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 1b99e855da40f..5b80193c1f27a 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -28,7 +28,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, cdef int64_t get_datetime64_nanos(object val) except? -1 -cpdef datetime localize_pydatetime(datetime dt, object tz) +cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) cdef int64_t cast_from_unit(object ts, str unit) except? -1 cpdef (int64_t, int) precision_from_unit(str unit) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 1bda35206ccef..865185f9acea7 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -810,14 +810,14 @@ cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): return dt.replace(tzinfo=tz) -cpdef inline datetime localize_pydatetime(datetime dt, object tz): +cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. Parameters ---------- dt : datetime or Timestamp - tz : tzinfo, "UTC", or None + tz : tzinfo or None Returns ------- diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index d6ca38e57d2d8..759c31b51d780 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -198,7 +198,7 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: @cython.wraparound(False) @cython.boundscheck(False) def get_start_end_field(const int64_t[:] dtindex, str field, - object freqstr=None, int month_kw=12): + str freqstr=None, int month_kw=12): """ Given an int64-based datetime index return array of indicators of whether timestamps are at the start/end of the month/quarter/year From 3eca2fb5cf1db0d66c9bff254c1e9982e1b0d5c3 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Apr 2021 07:50:40 -0700 Subject: [PATCH 2/6] revert sparse edits --- pandas/_libs/sparse.pyx | 2 -- pandas/_libs/sparse_op_helper.pxi.in | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 134883e159407..0c3d8915b749b 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -618,7 +618,6 @@ cdef class BlockIndex(SparseIndex): pass -@cython.internal cdef class BlockMerge: """ Object-oriented approach makes sharing state between recursive functions a @@ -662,7 +661,6 @@ cdef class BlockMerge: self.yi = xi -@cython.internal cdef class BlockUnion(BlockMerge): """ Object-oriented approach makes sharing state between recursive functions a diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 36f22445d0373..ce665ca812131 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -302,8 +302,8 @@ cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, raise NotImplementedError -cdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, - {{dtype}}_t yfill): +cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, + {{dtype}}_t yfill): return {{(opname, 'xfill', 'yfill', dtype) | get_op}} {{endfor}} From 16e045ec3969b5ed26803ad907816705f245094c Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Apr 2021 10:14:14 -0700 Subject: [PATCH 3/6] troubleshoot --- pandas/__init__.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 7cad3eded0585..313d3184468d9 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -25,16 +25,7 @@ is_numpy_dev as _is_numpy_dev, ) -try: - from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib -except ImportError as e: # pragma: no cover - # hack but overkill to use re - module = str(e).replace("cannot import name ", "") - raise ImportError( - f"C extension: {module} not built. If you want to import " - "pandas from the source directory, you may need to run " - "'python setup.py build_ext --force' to build the C extensions first." - ) from e +from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib from pandas._config import ( get_option, From 81a58d04f6768e09989e0d6571ac717380aa2644 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Apr 2021 10:33:12 -0700 Subject: [PATCH 4/6] remove defaults from get_start_end_field --- pandas/__init__.py | 11 ++++++++++- pandas/_libs/tslibs/fields.pyi | 4 ++-- pandas/_libs/tslibs/fields.pyx | 2 +- pandas/tests/tslibs/test_fields.py | 2 +- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 313d3184468d9..7cad3eded0585 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -25,7 +25,16 @@ is_numpy_dev as _is_numpy_dev, ) -from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib +try: + from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib +except ImportError as e: # pragma: no cover + # hack but overkill to use re + module = str(e).replace("cannot import name ", "") + raise ImportError( + f"C extension: {module} not built. If you want to import " + "pandas from the source directory, you may need to run " + "'python setup.py build_ext --force' to build the C extensions first." + ) from e from pandas._config import ( get_option, diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index 22ae156d78b7d..cc6f8e95a6818 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -15,8 +15,8 @@ def get_date_name_field( def get_start_end_field( dtindex: np.ndarray, # const int64_t[:] field: str, - freqstr: str | None = ..., - month_kw: int = ... + freqstr: str | None, + month_kw: int, ) -> np.ndarray: ... # np.ndarray[bool] diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 759c31b51d780..8de7e4331aff7 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -198,7 +198,7 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: @cython.wraparound(False) @cython.boundscheck(False) def get_start_end_field(const int64_t[:] dtindex, str field, - str freqstr=None, int month_kw=12): + str freqstr, int month_kw): """ Given an int64-based datetime index return array of indicators of whether timestamps are at the start/end of the month/quarter/year diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py index e5fe998923f8d..b851032a8fcaa 100644 --- a/pandas/tests/tslibs/test_fields.py +++ b/pandas/tests/tslibs/test_fields.py @@ -19,7 +19,7 @@ def test_fields_readonly(): expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) tm.assert_numpy_array_equal(result, expected) - result = fields.get_start_end_field(dtindex, "is_month_start", None) + result = fields.get_start_end_field(dtindex, "is_month_start", None, 12) expected = np.array([True, False, False, False, False], dtype=np.bool_) tm.assert_numpy_array_equal(result, expected) From 9f14f0550cac1fd240e8d195ebeaf5b12d99c3c2 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Apr 2021 15:57:25 -0700 Subject: [PATCH 5/6] annotations --- pandas/_libs/index.pyx | 3 +++ pandas/_libs/intervaltree.pxi.in | 12 ++++++------ pandas/_libs/sparse.pyx | 2 ++ pandas/_libs/sparse_op_helper.pxi.in | 4 ++-- pandas/_libs/tslibs/fields.pyi | 4 ++-- pandas/_libs/tslibs/fields.pyx | 2 +- pandas/tests/tslibs/test_fields.py | 2 +- 7 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f1f56c6c0c855..f7cec262ca302 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -74,6 +74,7 @@ cdef class IndexEngine: return val in self.mapping cpdef get_loc(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] cdef: Py_ssize_t loc @@ -109,6 +110,7 @@ cdef class IndexEngine: raise KeyError(val) cdef inline _get_loc_duplicates(self, object val): + # -> Py_ssize_t | slice | ndarray[bool] cdef: Py_ssize_t diff @@ -142,6 +144,7 @@ cdef class IndexEngine: cdef _unpack_bool_indexer(self, ndarray[uint8_t, ndim=1, cast=True] indexer, object val): + # Returns ndarray[bool] or int cdef: ndarray[intp_t, ndim=1] found int count diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 1af5b23e3393f..0fb01a2188a57 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -73,7 +73,7 @@ cdef class IntervalTree(IntervalMixin): self.root = node_cls(self.left, self.right, indices, leaf_size) @property - def left_sorter(self): + def left_sorter(self) -> np.ndarray: """How to sort the left labels; this is used for binary search """ if self._left_sorter is None: @@ -81,7 +81,7 @@ cdef class IntervalTree(IntervalMixin): return self._left_sorter @property - def right_sorter(self): + def right_sorter(self) -> np.ndarray: """How to sort the right labels """ if self._right_sorter is None: @@ -89,7 +89,7 @@ cdef class IntervalTree(IntervalMixin): return self._right_sorter @property - def is_overlapping(self): + def is_overlapping(self) -> bool: """ Determine if the IntervalTree contains overlapping intervals. Cached as self._is_overlapping. @@ -109,7 +109,7 @@ cdef class IntervalTree(IntervalMixin): return self._is_overlapping @property - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: """ Return True if the IntervalTree is monotonic increasing (only equal or increasing values), else False @@ -119,7 +119,7 @@ cdef class IntervalTree(IntervalMixin): sort_order = np.lexsort(values) return is_monotonic(sort_order, False)[0] - def get_indexer(self, scalar_t[:] target): + def get_indexer(self, scalar_t[:] target) -> np.ndarray: """Return the positions corresponding to unique intervals that overlap with the given array of scalar targets. """ @@ -180,7 +180,7 @@ cdef class IntervalTree(IntervalMixin): n_elements=self.root.n_elements)) # compat with IndexEngine interface - def clear_mapping(self): + def clear_mapping(self) -> None: pass diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 0c3d8915b749b..134883e159407 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -618,6 +618,7 @@ cdef class BlockIndex(SparseIndex): pass +@cython.internal cdef class BlockMerge: """ Object-oriented approach makes sharing state between recursive functions a @@ -661,6 +662,7 @@ cdef class BlockMerge: self.yi = xi +@cython.internal cdef class BlockUnion(BlockMerge): """ Object-oriented approach makes sharing state between recursive functions a diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index ce665ca812131..36f22445d0373 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -302,8 +302,8 @@ cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, raise NotImplementedError -cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, - {{dtype}}_t yfill): +cdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, + {{dtype}}_t yfill): return {{(opname, 'xfill', 'yfill', dtype) | get_op}} {{endfor}} diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index cc6f8e95a6818..22ae156d78b7d 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -15,8 +15,8 @@ def get_date_name_field( def get_start_end_field( dtindex: np.ndarray, # const int64_t[:] field: str, - freqstr: str | None, - month_kw: int, + freqstr: str | None = ..., + month_kw: int = ... ) -> np.ndarray: ... # np.ndarray[bool] diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 8de7e4331aff7..d6ca38e57d2d8 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -198,7 +198,7 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: @cython.wraparound(False) @cython.boundscheck(False) def get_start_end_field(const int64_t[:] dtindex, str field, - str freqstr, int month_kw): + object freqstr=None, int month_kw=12): """ Given an int64-based datetime index return array of indicators of whether timestamps are at the start/end of the month/quarter/year diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py index b851032a8fcaa..e5fe998923f8d 100644 --- a/pandas/tests/tslibs/test_fields.py +++ b/pandas/tests/tslibs/test_fields.py @@ -19,7 +19,7 @@ def test_fields_readonly(): expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) tm.assert_numpy_array_equal(result, expected) - result = fields.get_start_end_field(dtindex, "is_month_start", None, 12) + result = fields.get_start_end_field(dtindex, "is_month_start", None) expected = np.array([True, False, False, False, False], dtype=np.bool_) tm.assert_numpy_array_equal(result, expected) From 5d542c29f49794dda7ccc890e39e6b1ecbd095c4 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Apr 2021 16:29:24 -0700 Subject: [PATCH 6/6] revert sparse edit --- pandas/_libs/sparse_op_helper.pxi.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 36f22445d0373..ce665ca812131 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -302,8 +302,8 @@ cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, raise NotImplementedError -cdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, - {{dtype}}_t yfill): +cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, + {{dtype}}_t yfill): return {{(opname, 'xfill', 'yfill', dtype) | get_op}} {{endfor}}