Skip to content

REF/BUG: cast back to datetimelike in pad/backfill #40052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 27, 2021
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 32 additions & 32 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
"""
from __future__ import annotations

from functools import partial
from functools import (
partial,
wraps,
)
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -22,15 +25,12 @@
from pandas._typing import (
ArrayLike,
Axis,
DtypeObj,
)
from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.cast import infer_dtype_from
from pandas.core.dtypes.common import (
ensure_float64,
is_array_like,
is_integer_dtype,
is_numeric_v_string_like,
needs_i8_conversion,
)
Expand Down Expand Up @@ -674,54 +674,53 @@ def interpolate_2d(
return result


def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool):
"""
Cast values to a dtype that algos.pad and algos.backfill can handle.
"""
# TODO: for int-dtypes we make a copy, but for everything else this
# alters the values in-place. Is this intentional?
def _fillna_prep(values, mask=None):
# boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d

if needs_i8_conversion(dtype):
values = values.view(np.int64)
if mask is None:
mask = isna(values)

elif is_integer_dtype(values) and not has_mask:
# NB: this check needs to come after the datetime64 check above
# has_mask check to avoid casting i8 values that have already
# been cast from PeriodDtype
values = ensure_float64(values)
mask = mask.view(np.uint8)
return mask

return values

def _datetimelike_compat(func):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just started (and I mean just) experimenting with having a numba version of the libs.

numpy datetime and timedeltas can be handled by numba. This means that _cast_values_for_fillna as removed here (but replaced with the decorator) and creating the view of the mask are unnecessary with numba. d449ca0

i suspect that many of the i8 conversions in the main codebase could become redundant (presumably numba deals with this)

with this in mind, would it be worth considering moving any i8 conversions into _libs? This may produce a cleaner _libs api and simplify the python code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh. and back to the PR. can you type the decorator to preserve the signature of the decorated function (ignoring that values to the decorator will be superset of values in the function, since we don't yet have type parameters for numpy arrays)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just started (and I mean just) experimenting with having a numba version of the libs.

neat!

with this in mind, would it be worth considering moving any i8 conversions into _libs? This may produce a cleaner _libs api and simplify the python code.

ill have to give that some thought. one wrinkle that comes to mind is that cdef functions dont tend to play nicely with decorators

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may need a python interface layer in _libs, but early days yet since don't know how will handle pandas types that are defined as ctypes, without a performance hit.

"""
Wrapper to handle datetime64 and timedelta64 dtypes.
"""

def _fillna_prep(values, mask=None):
# boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
dtype = values.dtype
@wraps(func)
def new_func(values, limit=None, mask=None):
if needs_i8_conversion(values.dtype):
if mask is None:
# This needs to occur before casting to int64
mask = isna(values)

has_mask = mask is not None
if not has_mask:
# This needs to occur before datetime/timedeltas are cast to int64
mask = isna(values)
result = func(values.view("i8"), limit=limit, mask=mask)
return result.view(values.dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since the cython funcs operate inplace, is it possible to just return values. or is a view of a view nbd.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah lets save this for a future step


values = _cast_values_for_fillna(values, dtype, has_mask)
return func(values, limit=limit, mask=mask)

mask = mask.view(np.uint8)
return values, mask
return new_func


@_datetimelike_compat
def _pad_1d(values, limit=None, mask=None):
values, mask = _fillna_prep(values, mask)
mask = _fillna_prep(values, mask)
algos.pad_inplace(values, mask, limit=limit)
return values


@_datetimelike_compat
def _backfill_1d(values, limit=None, mask=None):
values, mask = _fillna_prep(values, mask)
mask = _fillna_prep(values, mask)
algos.backfill_inplace(values, mask, limit=limit)
return values


@_datetimelike_compat
def _pad_2d(values, limit=None, mask=None):
values, mask = _fillna_prep(values, mask)
mask = _fillna_prep(values, mask)

if np.all(values.shape):
algos.pad_2d_inplace(values, mask, limit=limit)
Expand All @@ -731,8 +730,9 @@ def _pad_2d(values, limit=None, mask=None):
return values


@_datetimelike_compat
def _backfill_2d(values, limit=None, mask=None):
values, mask = _fillna_prep(values, mask)
mask = _fillna_prep(values, mask)

if np.all(values.shape):
algos.backfill_2d_inplace(values, mask, limit=limit)
Expand Down