diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 5f3d946a1e024..b7f17aee35a44 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -7,13 +7,30 @@ from libc.math cimport fabs, sqrt import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, - NPY_INT64, NPY_INT32, NPY_INT16, NPY_INT8, - NPY_UINT64, NPY_UINT32, NPY_UINT16, NPY_UINT8, - NPY_FLOAT32, NPY_FLOAT64, - NPY_OBJECT, - int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t) +from numpy cimport ( + NPY_FLOAT32, + NPY_FLOAT64, + NPY_INT8, + NPY_INT16, + NPY_INT32, + NPY_INT64, + NPY_OBJECT, + NPY_UINT8, + NPY_UINT16, + NPY_UINT32, + NPY_UINT64, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) cnp.import_array() diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 093c53790cd35..dfa7aa708d681 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -3,13 +3,25 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, - int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t) +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) cnp.import_array() from pandas._libs.algos import ( - groupsort_indexer, ensure_platform_int, take_1d_int64_int64 + ensure_platform_int, + groupsort_indexer, + take_1d_int64_int64, ) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1990ef66a6bf1..7a18429f21a18 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -15,18 +15,33 @@ from cpython.iterator cimport PyIter_Check from cpython.sequence cimport PySequence_Check from cpython.number cimport PyNumber_Check -from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, - PyTime_Check, PyDelta_Check, - PyDateTime_IMPORT) +from cpython.datetime cimport ( + PyDateTime_Check, + PyDate_Check, + PyTime_Check, + PyDelta_Check, + PyDateTime_IMPORT, +) PyDateTime_IMPORT import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, PyArray_Check, PyArray_GETITEM, - PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, - flatiter, NPY_OBJECT, - int64_t, float32_t, float64_t, - uint8_t, uint64_t, complex128_t) +from numpy cimport ( + NPY_OBJECT, + PyArray_Check, + PyArray_GETITEM, + PyArray_ITER_DATA, + PyArray_ITER_NEXT, + PyArray_IterNew, + complex128_t, + flatiter, + float32_t, + float64_t, + int64_t, + ndarray, + uint8_t, + uint64_t, +) cnp.import_array() cdef extern from "numpy/arrayobject.h": @@ -60,7 +75,12 @@ from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare from pandas._libs.missing cimport ( - checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period, C_NA + checknull, + isnaobj, + is_null_datetime64, + is_null_timedelta64, + is_null_period, + C_NA, ) @@ -246,7 +266,7 @@ def item_from_zerodim(val: object) -> object: @cython.wraparound(False) @cython.boundscheck(False) -def fast_unique_multiple(list arrays, sort: bool=True): +def fast_unique_multiple(list arrays, sort: bool = True): """ Generate a list of unique values from a list of arrays. @@ -277,6 +297,7 @@ def fast_unique_multiple(list arrays, sort: bool=True): if val not in table: table[val] = stub uniques.append(val) + if sort is None: try: uniques.sort() @@ -289,7 +310,7 @@ def fast_unique_multiple(list arrays, sort: bool=True): @cython.wraparound(False) @cython.boundscheck(False) -def fast_unique_multiple_list(lists: list, sort: bool=True) -> list: +def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: cdef: list buf Py_ssize_t k = len(lists) diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 4e831081c8e54..e74b5919a4590 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -1,8 +1,20 @@ import cython from cython import Py_ssize_t -from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t, ndarray) +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + cimport numpy as cnp import numpy as np from pandas._libs.lib cimport c_is_list_like diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 3a6dd506b2428..4ca053a0ee83a 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -448,7 +448,7 @@ cdef class BlockIndex(SparseIndex): ylen = y.blengths # block may be split, but can't exceed original len / 2 + 1 - max_len = int(min(self.length, y.length) / 2) + 1 + max_len = min(self.length, y.length) // 2 + 1 out_bloc = np.empty(max_len, dtype=np.int32) out_blen = np.empty(max_len, dtype=np.int32) @@ -672,7 +672,7 @@ cdef class BlockUnion(BlockMerge): ystart = self.ystart yend = self.yend - max_len = int(min(self.x.length, self.y.length) / 2) + 1 + max_len = min(self.x.length, self.y.length) // 2 + 1 out_bloc = np.empty(max_len, dtype=np.int32) out_blen = np.empty(max_len, dtype=np.int32) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 9419f0eba39aa..c3a47902cff0f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1,8 +1,6 @@ from datetime import datetime -from cpython.object cimport ( - PyObject_RichCompareBool, - Py_EQ, Py_NE) +from cpython.object cimport PyObject_RichCompareBool, Py_EQ, Py_NE from numpy cimport int64_t, import_array, ndarray import numpy as np @@ -14,15 +12,25 @@ from libc.string cimport strlen, memset import cython -from cpython.datetime cimport (PyDateTime_Check, PyDelta_Check, PyDate_Check, - PyDateTime_IMPORT) +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, +) # import datetime C API PyDateTime_IMPORT from pandas._libs.tslibs.np_datetime cimport ( - npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct, - pandas_datetime_to_datetimestruct, check_dts_bounds, - NPY_DATETIMEUNIT, NPY_FR_D, NPY_FR_us) + npy_datetimestruct, + dtstruct_to_dt64, + dt64_to_dtstruct, + pandas_datetime_to_datetimestruct, + check_dts_bounds, + NPY_DATETIMEUNIT, + NPY_FR_D, + NPY_FR_us, +) cdef extern from "src/datetime/np_datetime.h": int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, @@ -37,12 +45,15 @@ from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds cimport pandas._libs.tslibs.ccalendar as ccalendar -from pandas._libs.tslibs.ccalendar cimport ( - dayofweek, get_day_of_year, is_leapyear) +from pandas._libs.tslibs.ccalendar cimport dayofweek, get_day_of_year, is_leapyear from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS from pandas._libs.tslibs.frequencies cimport ( - get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, - get_rule_month) + get_base_alias, + get_freq_code, + get_freq_str, + get_rule_month, + get_to_timestamp_base, +) from pandas._libs.tslibs.parsing import parse_time_string from pandas._libs.tslibs.resolution import Resolution from pandas._libs.tslibs.nattype import nat_strings @@ -55,7 +66,7 @@ from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal cdef: enum: - INT32_MIN = -2147483648 + INT32_MIN = -2_147_483_648 ctypedef struct asfreq_info: @@ -179,8 +190,7 @@ cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil: return asfreq_MtoB elif from_group == FR_WK: return asfreq_WtoB - elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, - FR_MS, FR_US, FR_NS]: + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: return asfreq_DTtoB else: return nofunc @@ -289,17 +299,15 @@ cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back, return DtoB_weekday(unix_date) -cdef inline int64_t upsample_daytime(int64_t ordinal, - asfreq_info *af_info) nogil: - if (af_info.is_end): +cdef inline int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + if af_info.is_end: return (ordinal + 1) * af_info.intraday_conversion_factor - 1 else: return ordinal * af_info.intraday_conversion_factor -cdef inline int64_t downsample_daytime(int64_t ordinal, - asfreq_info *af_info) nogil: - return ordinal // (af_info.intraday_conversion_factor) +cdef inline int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + return ordinal // af_info.intraday_conversion_factor cdef inline int64_t transform_via_day(int64_t ordinal, @@ -1464,24 +1472,24 @@ def extract_freq(ndarray[object] values): cdef: Py_ssize_t i, n = len(values) - object p + object value for i in range(n): - p = values[i] + value = values[i] try: # now Timestamp / NaT has freq attr - if is_period_object(p): - return p.freq + if is_period_object(value): + return value.freq except AttributeError: pass raise ValueError('freq not specified and cannot be inferred') - # ----------------------------------------------------------------------- # period helpers + @cython.wraparound(False) @cython.boundscheck(False) cdef int64_t[:] localize_dt64arr_to_period(const int64_t[:] stamps, diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 5508b208de00a..dfe050c7bbff7 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -45,8 +45,7 @@ cdef dict _parse_code_table = {'y': 0, 'u': 22} -def array_strptime(object[:] values, object fmt, - bint exact=True, errors='raise'): +def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'): """ Calculates the datetime structs represented by the passed array of strings @@ -78,12 +77,9 @@ def array_strptime(object[:] values, object fmt, if fmt is not None: if '%W' in fmt or '%U' in fmt: if '%Y' not in fmt and '%y' not in fmt: - raise ValueError("Cannot use '%W' or '%U' without " - "day and year") - if ('%A' not in fmt and '%a' not in fmt and '%w' not - in fmt): - raise ValueError("Cannot use '%W' or '%U' without " - "day and year") + raise ValueError("Cannot use '%W' or '%U' without day and year") + if '%A' not in fmt and '%a' not in fmt and '%w' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without day and year") elif '%Z' in fmt and '%z' in fmt: raise ValueError("Cannot parse both %Z and %z") @@ -749,6 +745,6 @@ cdef parse_timezone_directive(str z): microseconds = int(gmtoff_remainder + gmtoff_remainder_padding) total_minutes = ((hours * 60) + minutes + (seconds // 60) + - (microseconds // 60000000)) + (microseconds // 60_000_000)) total_minutes = -total_minutes if z.startswith("-") else total_minutes return pytz.FixedOffset(total_minutes) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 35ee87e714fa8..07947f6677c04 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -196,7 +196,7 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo): arr = np.empty(sz, dtype='i8') for i in range(sz): - arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 + arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 return arr @@ -217,7 +217,7 @@ cdef object get_dst_info(object tz): if cache_key is None: # e.g. pytz.FixedOffset, matplotlib.dates._UTC, # psycopg2.tz.FixedOffsetTimezone - num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 + num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 return (np.array([NPY_NAT + 1], dtype=np.int64), np.array([num], dtype=np.int64), None)