Skip to content

REF: avoid DTA/PA methods in SemiMonthOffset.apply_index #34783

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 49 additions & 108 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2269,56 +2269,62 @@ cdef class SemiMonthOffset(SingleConstructorOffset):
raise NotImplementedError(self)

@apply_index_wraps
@cython.wraparound(False)
@cython.boundscheck(False)
def apply_index(self, dtindex):
# determine how many days away from the 1st of the month we are

dti = dtindex
i8other = dtindex.asi8
days_from_start = dtindex.to_perioddelta("M").asi8
delta = Timedelta(days=self.day_of_month - 1).value

# get boolean array for each element before the day_of_month
before_day_of_month = days_from_start < delta

# get boolean array for each element after the day_of_month
after_day_of_month = days_from_start > delta

# determine the correct n for each date in dtindex
roll = self._get_roll(i8other, before_day_of_month, after_day_of_month)

# isolate the time since it will be striped away one the next line
time = (i8other % DAY_NANOS).view("timedelta64[ns]")

# apply the correct number of months

# integer-array addition on PeriodIndex is deprecated,
# so we use _addsub_int_array directly
asper = dtindex.to_period("M")
cdef:
int64_t[:] i8other = dtindex.view("i8")
Py_ssize_t i, count = len(i8other)
int64_t val
int64_t[:] out = np.empty(count, dtype="i8")
npy_datetimestruct dts
int months, to_day, nadj, n = self.n
int days_in_month, day, anchor_dom = self.day_of_month
bint is_start = isinstance(self, SemiMonthBegin)

shifted = asper._addsub_int_array(roll // 2, operator.add)
dtindex = type(dti)(shifted.to_timestamp())
dt64other = np.asarray(dtindex)
with nogil:
for i in range(count):
val = i8other[i]
if val == NPY_NAT:
out[i] = NPY_NAT
continue

# apply the correct day
dt64result = self._apply_index_days(dt64other, roll)
dt64_to_dtstruct(val, &dts)
day = dts.day

# Adjust so that we are always looking at self.day_of_month,
# incrementing/decrementing n if necessary.
nadj = roll_convention(day, n, anchor_dom)

days_in_month = get_days_in_month(dts.year, dts.month)
# For SemiMonthBegin on other.day == 1 and
# SemiMonthEnd on other.day == days_in_month,
# shifting `other` to `self.day_of_month` _always_ requires
# incrementing/decrementing `n`, regardless of whether it is
# initially positive.
if is_start and (n <= 0 and day == 1):
nadj -= 1
elif (not is_start) and (n > 0 and day == days_in_month):
nadj += 1

if is_start:
# See also: SemiMonthBegin._apply
months = nadj // 2 + nadj % 2
to_day = 1 if nadj % 2 else anchor_dom

return dt64result + time
else:
# See also: SemiMonthEnd._apply
months = nadj // 2
to_day = 31 if nadj % 2 else anchor_dom

def _get_roll(self, i8other, before_day_of_month, after_day_of_month):
"""
Return an array with the correct n for each date in dtindex.
dts.year = year_add_months(dts, months)
dts.month = month_add_months(dts, months)
days_in_month = get_days_in_month(dts.year, dts.month)
dts.day = min(to_day, days_in_month)

The roll array is based on the fact that dtindex gets rolled back to
the first day of the month.
"""
# before_day_of_month and after_day_of_month are ndarray[bool]
raise NotImplementedError
out[i] = dtstruct_to_dt64(&dts)

def _apply_index_days(self, dt64other, roll):
"""
Apply the correct day for each date in dt64other.
"""
raise NotImplementedError
return out.base


cdef class SemiMonthEnd(SemiMonthOffset):
Expand Down Expand Up @@ -2347,39 +2353,6 @@ cdef class SemiMonthEnd(SemiMonthOffset):
day = 31 if n % 2 else self.day_of_month
return shift_month(other, months, day)

def _get_roll(self, i8other, before_day_of_month, after_day_of_month):
# before_day_of_month and after_day_of_month are ndarray[bool]
n = self.n
is_month_end = get_start_end_field(i8other, "is_month_end")
if n > 0:
roll_end = np.where(is_month_end, 1, 0)
roll_before = np.where(before_day_of_month, n, n + 1)
roll = roll_end + roll_before
elif n == 0:
roll_after = np.where(after_day_of_month, 2, 0)
roll_before = np.where(~after_day_of_month, 1, 0)
roll = roll_before + roll_after
else:
roll = np.where(after_day_of_month, n + 2, n + 1)
return roll

def _apply_index_days(self, dt64other, roll):
"""
Add days portion of offset to dt64other.

Parameters
----------
dt64other : ndarray[datetime64[ns]]
roll : ndarray[int64_t]

Returns
-------
ndarray[datetime64[ns]]
"""
nanos = (roll % 2) * Timedelta(days=self.day_of_month).value
dt64other += nanos.astype("timedelta64[ns]")
return dt64other + Timedelta(days=-1)


cdef class SemiMonthBegin(SemiMonthOffset):
"""
Expand All @@ -2405,38 +2378,6 @@ cdef class SemiMonthBegin(SemiMonthOffset):
day = 1 if n % 2 else self.day_of_month
return shift_month(other, months, day)

def _get_roll(self, i8other, before_day_of_month, after_day_of_month):
# before_day_of_month and after_day_of_month are ndarray[bool]
n = self.n
is_month_start = get_start_end_field(i8other, "is_month_start")
if n > 0:
roll = np.where(before_day_of_month, n, n + 1)
elif n == 0:
roll_start = np.where(is_month_start, 0, 1)
roll_after = np.where(after_day_of_month, 1, 0)
roll = roll_start + roll_after
else:
roll_after = np.where(after_day_of_month, n + 2, n + 1)
roll_start = np.where(is_month_start, -1, 0)
roll = roll_after + roll_start
return roll

def _apply_index_days(self, dt64other, roll):
"""
Add days portion of offset to dt64other.

Parameters
----------
dt64other : ndarray[datetime64[ns]]
roll : ndarray[int64_t]

Returns
-------
ndarray[datetime64[ns]]
"""
nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value
return dt64other + nanos.astype("timedelta64[ns]")


# ---------------------------------------------------------------------
# Week-Based Offset Classes
Expand Down