Skip to content

REF: remove numeric arg from NDFrame._convert #50011

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,8 @@ def coerce_indexer_dtype(indexer, categories) -> np.ndarray:

def soft_convert_objects(
values: np.ndarray,
*,
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
period: bool = True,
copy: bool = True,
Expand All @@ -968,7 +968,6 @@ def soft_convert_objects(
----------
values : np.ndarray[object]
datetime : bool, default True
numeric: bool, default True
timedelta : bool, default True
period : bool, default True
copy : bool, default True
Expand All @@ -978,16 +977,15 @@ def soft_convert_objects(
np.ndarray or ExtensionArray
"""
validate_bool_kwarg(datetime, "datetime")
validate_bool_kwarg(numeric, "numeric")
validate_bool_kwarg(timedelta, "timedelta")
validate_bool_kwarg(copy, "copy")

conversion_count = sum((datetime, numeric, timedelta))
conversion_count = sum((datetime, timedelta))
if conversion_count == 0:
raise ValueError("At least one of datetime, numeric or timedelta must be True.")
raise ValueError("At least one of datetime or timedelta must be True.")

# Soft conversions
if datetime or timedelta:
if datetime or timedelta or period:
# GH 20380, when datetime is beyond year 2262, hence outside
# bound of nanosecond-resolution 64-bit integers.
converted = lib.maybe_convert_objects(
Expand All @@ -999,13 +997,6 @@ def soft_convert_objects(
if converted is not values:
return converted

if numeric and is_object_dtype(values.dtype):
converted, _ = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)

# If all NaNs, then do not-alter
values = converted if not isna(converted).all() else values
values = values.copy() if copy else values

return values


Expand Down
12 changes: 2 additions & 10 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6318,8 +6318,8 @@ def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT:
@final
def _convert(
self: NDFrameT,
*,
datetime: bool_t = False,
numeric: bool_t = False,
timedelta: bool_t = False,
) -> NDFrameT:
"""
Expand All @@ -6329,9 +6329,6 @@ def _convert(
----------
datetime : bool, default False
If True, convert to date where possible.
numeric : bool, default False
If True, attempt to convert to numbers (including strings), with
unconvertible values becoming NaN.
timedelta : bool, default False
If True, convert to timedelta where possible.

Expand All @@ -6340,12 +6337,10 @@ def _convert(
converted : same as input object
"""
validate_bool_kwarg(datetime, "datetime")
validate_bool_kwarg(numeric, "numeric")
validate_bool_kwarg(timedelta, "timedelta")
return self._constructor(
self._mgr.convert(
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
copy=True,
)
Expand Down Expand Up @@ -6390,11 +6385,8 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
A int64
dtype: object
"""
# numeric=False necessary to only soft convert;
# python objects will still be converted to
# native numpy numeric types
return self._constructor(
self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True)
self._mgr.convert(datetime=True, timedelta=True, copy=True)
).__finalize__(self, method="infer_objects")

@final
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,9 +377,9 @@ def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:

def convert(
self: T,
*,
copy: bool = True,
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
) -> T:
def _convert(arr):
Expand All @@ -389,7 +389,6 @@ def _convert(arr):
return soft_convert_objects(
arr,
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
copy=copy,
)
Expand Down
17 changes: 6 additions & 11 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,9 +429,7 @@ def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]:
# but ATM it breaks too much existing code.
# split and convert the blocks

return extend_blocks(
[blk.convert(datetime=True, numeric=False) for blk in blocks]
)
return extend_blocks([blk.convert(datetime=True) for blk in blocks])

if downcast is None:
return blocks
Expand All @@ -451,9 +449,9 @@ def _downcast_2d(self, dtype) -> list[Block]:

def convert(
self,
*,
copy: bool = True,
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
) -> list[Block]:
"""
Expand Down Expand Up @@ -570,7 +568,7 @@ def replace(
if not (self.is_object and value is None):
# if the user *explicitly* gave None, we keep None, otherwise
# may downcast to NaN
blocks = blk.convert(numeric=False, copy=False)
blocks = blk.convert(copy=False)
else:
blocks = [blk]
return blocks
Expand Down Expand Up @@ -642,7 +640,7 @@ def _replace_regex(
replace_regex(new_values, rx, value, mask)

block = self.make_block(new_values)
return block.convert(numeric=False, copy=False)
return block.convert(copy=False)

@final
def replace_list(
Expand Down Expand Up @@ -712,9 +710,7 @@ def replace_list(
)
if convert and blk.is_object and not all(x is None for x in dest_list):
# GH#44498 avoid unwanted cast-back
result = extend_blocks(
[b.convert(numeric=False, copy=True) for b in result]
)
result = extend_blocks([b.convert(copy=True) for b in result])
new_rb.extend(result)
rb = new_rb
return rb
Expand Down Expand Up @@ -1969,9 +1965,9 @@ def reduce(self, func) -> list[Block]:
@maybe_split
def convert(
self,
*,
copy: bool = True,
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
) -> list[Block]:
"""
Expand All @@ -1987,7 +1983,6 @@ def convert(
res_values = soft_convert_objects(
values,
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
copy=copy,
)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,16 +443,15 @@ def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:

def convert(
self: T,
*,
copy: bool = True,
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
) -> T:
return self.apply(
"convert",
copy=copy,
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
)

Expand Down
23 changes: 3 additions & 20 deletions pandas/tests/frame/methods/test_convert.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import numpy as np
import pytest

from pandas import (
DataFrame,
Series,
)
from pandas import DataFrame
import pandas._testing as tm


Expand All @@ -21,17 +18,11 @@ def test_convert_objects(self, float_string_frame):
float_string_frame["I"] = "1"

# add in some items that will be nan
length = len(float_string_frame)
float_string_frame["J"] = "1."
float_string_frame["K"] = "1"
float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled"
converted = float_string_frame._convert(datetime=True, numeric=True)
assert converted["H"].dtype == "float64"
assert converted["I"].dtype == "int64"
assert converted["J"].dtype == "float64"
assert converted["K"].dtype == "float64"
assert len(converted["J"].dropna()) == length - 5
assert len(converted["K"].dropna()) == length - 5
converted = float_string_frame._convert(datetime=True)
tm.assert_frame_equal(converted, float_string_frame)

# via astype
converted = float_string_frame.copy()
Expand All @@ -45,14 +36,6 @@ def test_convert_objects(self, float_string_frame):
with pytest.raises(ValueError, match="invalid literal"):
converted["H"].astype("int32")

def test_convert_mixed_single_column(self):
# GH#4119, not converting a mixed type (e.g.floats and object)
# mixed in a single column
df = DataFrame({"s": Series([1, "na", 3, 4])})
result = df._convert(datetime=True, numeric=True)
expected = DataFrame({"s": Series([1, np.nan, 3, 4])})
tm.assert_frame_equal(result, expected)

def test_convert_objects_no_conversion(self):
mixed1 = DataFrame({"a": [1, 2, 3], "b": [4.0, 5, 6], "c": ["x", "y", "z"]})
mixed2 = mixed1._convert(datetime=True)
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,14 +557,6 @@ def test_astype_assignment(self):
)
tm.assert_frame_equal(df, expected)

df = df_orig.copy()
with tm.assert_produces_warning(FutureWarning, match=msg):
df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
expected = DataFrame(
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
tm.assert_frame_equal(df, expected)

# GH5702 (loc)
df = df_orig.copy()
with tm.assert_produces_warning(FutureWarning, match=msg):
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,9 +599,9 @@ def _compare(old_mgr, new_mgr):
mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
new_mgr = mgr.convert(numeric=True)
assert new_mgr.iget(0).dtype == np.int64
assert new_mgr.iget(1).dtype == np.float64
new_mgr = mgr.convert()
assert new_mgr.iget(0).dtype == np.object_
assert new_mgr.iget(1).dtype == np.object_
assert new_mgr.iget(2).dtype == np.object_
assert new_mgr.iget(3).dtype == np.int64
assert new_mgr.iget(4).dtype == np.float64
Expand All @@ -612,9 +612,9 @@ def _compare(old_mgr, new_mgr):
mgr.iset(0, np.array(["1"] * N, dtype=np.object_))
mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
new_mgr = mgr.convert(numeric=True)
assert new_mgr.iget(0).dtype == np.int64
assert new_mgr.iget(1).dtype == np.float64
new_mgr = mgr.convert()
assert new_mgr.iget(0).dtype == np.object_
assert new_mgr.iget(1).dtype == np.object_
assert new_mgr.iget(2).dtype == np.object_
assert new_mgr.iget(3).dtype == np.int32
assert new_mgr.iget(4).dtype == np.bool_
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ def try_remove_ws(x):
]
dfnew = df.applymap(try_remove_ws).replace(old, new)
gtnew = ground_truth.applymap(try_remove_ws)
converted = dfnew._convert(datetime=True, numeric=True)
converted = dfnew._convert(datetime=True)
date_cols = ["Closing Date", "Updated Date"]
converted[date_cols] = converted[date_cols].apply(to_datetime)
tm.assert_frame_equal(converted, gtnew)
Expand Down
Loading