Skip to content

REF: implement PandasArray.pad_or_backfill #53827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2235,7 +2235,7 @@ def interpolate(
*,
method,
axis: int,
index: Index | None,
index: Index,
limit,
limit_direction,
limit_area,
Expand All @@ -2255,7 +2255,7 @@ def interpolate(
else:
out_data = self._ndarray.copy()

missing.interpolate_array_2d(
missing.interpolate_2d_inplace(
out_data,
method=method,
axis=axis,
Expand Down
41 changes: 38 additions & 3 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import (
TYPE_CHECKING,
Literal,
)

import numpy as np

Expand Down Expand Up @@ -32,6 +35,7 @@
from pandas._typing import (
AxisInt,
Dtype,
FillnaOptions,
NpDtype,
Scalar,
Self,
Expand Down Expand Up @@ -224,12 +228,42 @@ def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
fv = np.nan
return self._ndarray, fv

def pad_or_backfill(
self,
*,
method: FillnaOptions,
axis: int,
limit: int | None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
"""
ffill or bfill
"""
if copy:
out_data = self._ndarray.copy()
else:
out_data = self._ndarray

meth = missing.clean_fill_method(method)
missing.pad_or_backfill_inplace(
out_data,
method=meth,
axis=axis,
limit=limit,
limit_area=limit_area,
)

if not copy:
return self
return type(self)._simple_new(out_data, dtype=self.dtype)

def interpolate(
self,
*,
method,
axis: int,
index: Index | None,
index: Index,
limit,
limit_direction,
limit_area,
Expand All @@ -246,7 +280,8 @@ def interpolate(
else:
out_data = self._ndarray.copy()

missing.interpolate_array_2d(
# TODO: assert we have floating dtype?
missing.interpolate_2d_inplace(
out_data,
method=method,
axis=axis,
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
check_array_indexer,
unpack_tuple_and_ellipses,
)
from pandas.core.missing import interpolate_2d
from pandas.core.missing import pad_or_backfill_inplace
from pandas.core.nanops import check_below_min_count

from pandas.io.formats import printing
Expand Down Expand Up @@ -764,11 +764,11 @@ def fillna(
stacklevel=find_stack_level(),
)
new_values = np.asarray(self)
# interpolate_2d modifies new_values inplace
# error: Argument "method" to "interpolate_2d" has incompatible type
# "Literal['backfill', 'bfill', 'ffill', 'pad']"; expected
# pad_or_backfill_inplace modifies new_values inplace
# error: Argument "method" to "pad_or_backfill_inplace" has incompatible
# type "Literal['backfill', 'bfill', 'ffill', 'pad']"; expected
# "Literal['pad', 'backfill']"
interpolate_2d(
pad_or_backfill_inplace(
new_values, method=method, limit=limit # type: ignore[arg-type]
)
return type(self)(new_values, fill_value=self.fill_value)
Expand Down
39 changes: 27 additions & 12 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1352,7 +1352,7 @@ def interpolate(
inplace: bool = False,
limit: int | None = None,
limit_direction: Literal["forward", "backward", "both"] = "forward",
limit_area: str | None = None,
limit_area: Literal["inside", "outside"] | None = None,
fill_value: Any | None = None,
downcast: Literal["infer"] | None = None,
using_cow: bool = False,
Expand Down Expand Up @@ -1410,17 +1410,32 @@ def interpolate(

# Dispatch to the PandasArray method.
# We know self.array_values is a PandasArray bc EABlock overrides
new_values = cast(PandasArray, self.array_values).interpolate(
method=method,
axis=axis,
index=index,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
inplace=arr_inplace,
**kwargs,
)
if m is not None:
if fill_value is not None:
# similar to validate_fillna_kwargs
raise ValueError("Cannot pass both fill_value and method")

# TODO: warn about ignored kwargs, limit_direction, index...?
new_values = cast(PandasArray, self.array_values).pad_or_backfill(
method=method,
axis=axis,
limit=limit,
limit_area=limit_area,
copy=not arr_inplace,
)
else:
assert index is not None # for mypy
new_values = cast(PandasArray, self.array_values).interpolate(
method=method,
axis=axis,
index=index,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
inplace=arr_inplace,
**kwargs,
)
data = new_values._ndarray

nb = self.make_block_same_class(data, refs=refs)
Expand Down
59 changes: 3 additions & 56 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,60 +302,7 @@ def get_interp_index(method, index: Index) -> Index:
return index


def interpolate_array_2d(
data: np.ndarray,
method: str = "pad",
axis: AxisInt = 0,
index: Index | None = None,
limit: int | None = None,
limit_direction: str = "forward",
limit_area: str | None = None,
fill_value: Any | None = None,
**kwargs,
) -> None:
"""
Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill.

Notes
-----
Alters 'data' in-place.
"""
try:
m = clean_fill_method(method)
except ValueError:
m = None

if m is not None:
if fill_value is not None:
# similar to validate_fillna_kwargs
raise ValueError("Cannot pass both fill_value and method")

interpolate_2d(
data,
method=m,
axis=axis,
limit=limit,
# error: Argument "limit_area" to "interpolate_2d" has incompatible
# type "Optional[str]"; expected "Optional[Literal['inside', 'outside']]"
limit_area=limit_area, # type: ignore[arg-type]
)
else:
assert index is not None # for mypy

_interpolate_2d_with_fill(
data=data,
index=index,
axis=axis,
method=method,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
**kwargs,
)


def _interpolate_2d_with_fill(
def interpolate_2d_inplace(
data: np.ndarray, # floating dtype
index: Index,
axis: AxisInt,
Expand Down Expand Up @@ -845,7 +792,7 @@ def _interpolate_with_limit_area(
if last is None:
last = len(values)

interpolate_2d(
pad_or_backfill_inplace(
values,
method=method,
limit=limit,
Expand All @@ -861,7 +808,7 @@ def _interpolate_with_limit_area(
values[invalid] = np.nan


def interpolate_2d(
def pad_or_backfill_inplace(
values: np.ndarray,
method: Literal["pad", "backfill"] = "pad",
axis: AxisInt = 0,
Expand Down