-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
CLN: some code cleanups in pandas/_libs/ #31808
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
1ea71f8
dcd8c38
8634a37
362e1f7
e151183
2b631b5
b369447
4f600f9
208bc03
4cbec3d
0b47e28
eaf2d49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -448,7 +448,7 @@ cdef class BlockIndex(SparseIndex): | |
ylen = y.blengths | ||
|
||
# block may be split, but can't exceed original len / 2 + 1 | ||
max_len = int(min(self.length, y.length) / 2) + 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are there no cases where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, in order for number // divider + (number % divider > 0) For example: number = 42.01
divider = 8
number / divider # 5.25125
number // divider # 5.0
int(number / divider) # 5
number // divider + (number % divider > 0) # 6.0 |
||
max_len = min(self.length, y.length) // 2 + 1 | ||
out_bloc = np.empty(max_len, dtype=np.int32) | ||
out_blen = np.empty(max_len, dtype=np.int32) | ||
|
||
|
@@ -672,7 +672,7 @@ cdef class BlockUnion(BlockMerge): | |
ystart = self.ystart | ||
yend = self.yend | ||
|
||
max_len = int(min(self.x.length, self.y.length) / 2) + 1 | ||
max_len = min(self.x.length, self.y.length) // 2 + 1 | ||
out_bloc = np.empty(max_len, dtype=np.int32) | ||
out_blen = np.empty(max_len, dtype=np.int32) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,6 @@ | ||
from datetime import datetime | ||
|
||
from cpython.object cimport ( | ||
PyObject_RichCompareBool, | ||
Py_EQ, Py_NE) | ||
from cpython.object cimport PyObject_RichCompareBool, Py_EQ, Py_NE | ||
|
||
from numpy cimport int64_t, import_array, ndarray | ||
import numpy as np | ||
|
@@ -14,15 +12,25 @@ from libc.string cimport strlen, memset | |
|
||
import cython | ||
|
||
from cpython.datetime cimport (PyDateTime_Check, PyDelta_Check, PyDate_Check, | ||
PyDateTime_IMPORT) | ||
from cpython.datetime cimport ( | ||
PyDate_Check, | ||
PyDateTime_Check, | ||
PyDateTime_IMPORT, | ||
PyDelta_Check, | ||
) | ||
# import datetime C API | ||
PyDateTime_IMPORT | ||
|
||
from pandas._libs.tslibs.np_datetime cimport ( | ||
npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct, | ||
pandas_datetime_to_datetimestruct, check_dts_bounds, | ||
NPY_DATETIMEUNIT, NPY_FR_D, NPY_FR_us) | ||
npy_datetimestruct, | ||
dtstruct_to_dt64, | ||
dt64_to_dtstruct, | ||
pandas_datetime_to_datetimestruct, | ||
check_dts_bounds, | ||
NPY_DATETIMEUNIT, | ||
NPY_FR_D, | ||
NPY_FR_us, | ||
) | ||
|
||
cdef extern from "src/datetime/np_datetime.h": | ||
int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, | ||
|
@@ -37,12 +45,15 @@ from pandas._libs.tslibs.timedeltas import Timedelta | |
from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds | ||
|
||
cimport pandas._libs.tslibs.ccalendar as ccalendar | ||
from pandas._libs.tslibs.ccalendar cimport ( | ||
dayofweek, get_day_of_year, is_leapyear) | ||
from pandas._libs.tslibs.ccalendar cimport dayofweek, get_day_of_year, is_leapyear | ||
from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS | ||
from pandas._libs.tslibs.frequencies cimport ( | ||
get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, | ||
get_rule_month) | ||
get_base_alias, | ||
get_freq_code, | ||
get_freq_str, | ||
get_rule_month, | ||
get_to_timestamp_base, | ||
) | ||
from pandas._libs.tslibs.parsing import parse_time_string | ||
from pandas._libs.tslibs.resolution import Resolution | ||
from pandas._libs.tslibs.nattype import nat_strings | ||
|
@@ -55,7 +66,7 @@ from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal | |
|
||
cdef: | ||
enum: | ||
INT32_MIN = -2147483648 | ||
INT32_MIN = -2_147_483_648 | ||
|
||
|
||
ctypedef struct asfreq_info: | ||
|
@@ -131,9 +142,7 @@ cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil: | |
int col = max_value(from_index, to_index) | ||
# row or col < 6 means frequency strictly lower than Daily, which | ||
# do not use daytime_conversion_factors | ||
if row < 6: | ||
return 0 | ||
elif col < 6: | ||
if row < 6 or col < 6: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't this need brackets to be done in the correct order? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche I have reverted this, as I was doing it wrong before, I haven't looked at the possibility that row can be let's say 12 and col be 2. Reverted in 208bc03 |
||
return 0 | ||
return daytime_conversion_factor_matrix[row - 6][col - 6] | ||
|
||
|
@@ -179,8 +188,7 @@ cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil: | |
return <freq_conv_func>asfreq_MtoB | ||
elif from_group == FR_WK: | ||
return <freq_conv_func>asfreq_WtoB | ||
elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, | ||
FR_MS, FR_US, FR_NS]: | ||
elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: | ||
return <freq_conv_func>asfreq_DTtoB | ||
else: | ||
return <freq_conv_func>nofunc | ||
|
@@ -289,17 +297,15 @@ cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back, | |
return DtoB_weekday(unix_date) | ||
|
||
|
||
cdef inline int64_t upsample_daytime(int64_t ordinal, | ||
asfreq_info *af_info) nogil: | ||
cdef inline int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: | ||
if (af_info.is_end): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you wanted to remove the parens here, i wouldnt object |
||
return (ordinal + 1) * af_info.intraday_conversion_factor - 1 | ||
else: | ||
return ordinal * af_info.intraday_conversion_factor | ||
|
||
|
||
cdef inline int64_t downsample_daytime(int64_t ordinal, | ||
asfreq_info *af_info) nogil: | ||
return ordinal // (af_info.intraday_conversion_factor) | ||
cdef inline int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: | ||
return ordinal // af_info.intraday_conversion_factor | ||
|
||
|
||
cdef inline int64_t transform_via_day(int64_t ordinal, | ||
|
@@ -1464,24 +1470,24 @@ def extract_freq(ndarray[object] values): | |
|
||
cdef: | ||
Py_ssize_t i, n = len(values) | ||
object p | ||
object value | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
for i in range(n): | ||
p = values[i] | ||
value = values[i] | ||
|
||
try: | ||
# now Timestamp / NaT has freq attr | ||
if is_period_object(p): | ||
return p.freq | ||
if is_period_object(value): | ||
return value.freq | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for avoiding 1-character variable name |
||
except AttributeError: | ||
pass | ||
|
||
raise ValueError('freq not specified and cannot be inferred') | ||
|
||
|
||
# ----------------------------------------------------------------------- | ||
# period helpers | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
cdef int64_t[:] localize_dt64arr_to_period(const int64_t[:] stamps, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,8 +45,7 @@ cdef dict _parse_code_table = {'y': 0, | |
'u': 22} | ||
|
||
|
||
def array_strptime(object[:] values, object fmt, | ||
bint exact=True, errors='raise'): | ||
def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'): | ||
""" | ||
Calculates the datetime structs represented by the passed array of strings | ||
|
||
|
@@ -78,12 +77,10 @@ def array_strptime(object[:] values, object fmt, | |
if fmt is not None: | ||
if '%W' in fmt or '%U' in fmt: | ||
if '%Y' not in fmt and '%y' not in fmt: | ||
raise ValueError("Cannot use '%W' or '%U' without " | ||
"day and year") | ||
raise ValueError("Cannot use '%W' or '%U' without day and year") | ||
if ('%A' not in fmt and '%a' not in fmt and '%w' not | ||
in fmt): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the |
||
raise ValueError("Cannot use '%W' or '%U' without " | ||
"day and year") | ||
raise ValueError("Cannot use '%W' or '%U' without day and year") | ||
elif '%Z' in fmt and '%z' in fmt: | ||
raise ValueError("Cannot parse both %Z and %z") | ||
|
||
|
@@ -749,6 +746,6 @@ cdef parse_timezone_directive(str z): | |
microseconds = int(gmtoff_remainder + gmtoff_remainder_padding) | ||
|
||
total_minutes = ((hours * 60) + minutes + (seconds // 60) + | ||
(microseconds // 60000000)) | ||
(microseconds // 60_000_000)) | ||
total_minutes = -total_minutes if z.startswith("-") else total_minutes | ||
return pytz.FixedOffset(total_minutes) |
Uh oh!
There was an error while loading. Please reload this page.