From 1470dd62488f200d9f972b492bb6bbe08fb38e8e Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 26 Mar 2020 23:43:32 +0100 Subject: [PATCH 1/3] Fix unwanted type casting while replacing values in a DataFrame --- pandas/core/internals/blocks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a8cdb554edf03..660c70b64a961 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -734,7 +734,7 @@ def replace( # try again with a compatible block block = self.astype(object) - return block.replace( + block_replaced = block.replace( to_replace=to_replace, value=value, inplace=inplace, @@ -742,6 +742,9 @@ def replace( regex=regex, convert=convert, ) + return [ + inner_elem.convert() for elem in block_replaced for inner_elem in elem + ] values = self.values if lib.is_scalar(to_replace) and isinstance(values, np.ndarray): From ec40b4117e2c0067e10dc113e83da036eb64011d Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 27 Mar 2020 16:52:38 +0100 Subject: [PATCH 2/3] Add tests and fix small Bug --- pandas/core/internals/blocks.py | 14 +++++++++++--- pandas/tests/frame/methods/test_replace.py | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 660c70b64a961..1aa5d991124f7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -742,9 +742,17 @@ def replace( regex=regex, convert=convert, ) - return [ - inner_elem.convert() for elem in block_replaced for inner_elem in elem - ] + blocks_converted = [] + for ls_elem in block_replaced: + # If a replace was executed block_replaced is list of lists, + # if no replace was necessary block_replaced is only a list + if is_list_like(ls_elem): + blocks_converted.extend( + [sub_block.convert() for sub_block in ls_elem] + ) + else: + blocks_converted.extend([ls_elem.convert()]) + return blocks_converted values = self.values if lib.is_scalar(to_replace) and isinstance(values, np.ndarray): diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index ee89562261b19..1367df2203db1 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -22,6 +22,25 @@ def mix_abc() -> Dict[str, List[Union[float, str]]]: class TestDataFrameReplace: + def test_replace_without_type_cast(self): + # PH: 32988: Fix unwanted type casting while replacing + result = ( + pd.DataFrame(np.eye(2)) + .replace(to_replace=[None, -np.inf, np.inf], value=pd.NA) + .dtypes + ) + expected = pd.Series([np.float64, np.float64]) + tm.assert_series_equal(result, expected) + + result = pd.DataFrame(np.eye(2)).replace( + to_replace=[None, -np.inf, np.inf, 1.0], value=pd.NA + ) + expected_dtypes = pd.Series([np.float64, np.float64]) + + expected = pd.DataFrame({0: [np.nan, 0.0], 1: [0.0, np.nan]}) + tm.assert_series_equal(result.dtypes, expected_dtypes) + tm.assert_frame_equal(result, expected) + def test_replace_inplace(self, datetime_frame, float_string_frame): datetime_frame["A"][:5] = np.nan datetime_frame["A"][-5:] = np.nan From 126d6d7cc73c49a767ff0370c8cceb0323ba010c Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 27 Mar 2020 17:43:27 +0100 Subject: [PATCH 3/3] Change comparison from dtypes to values --- pandas/tests/frame/methods/test_replace.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 1367df2203db1..c1035dc8cf2ab 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -24,21 +24,16 @@ def mix_abc() -> Dict[str, List[Union[float, str]]]: class TestDataFrameReplace: def test_replace_without_type_cast(self): # PH: 32988: Fix unwanted type casting while replacing - result = ( - pd.DataFrame(np.eye(2)) - .replace(to_replace=[None, -np.inf, np.inf], value=pd.NA) - .dtypes + result = pd.DataFrame(np.eye(2)).replace( + to_replace=[None, -np.inf, np.inf], value=pd.NA ) - expected = pd.Series([np.float64, np.float64]) - tm.assert_series_equal(result, expected) + tm.assert_frame_equal(result, pd.DataFrame(np.eye(2))) result = pd.DataFrame(np.eye(2)).replace( to_replace=[None, -np.inf, np.inf, 1.0], value=pd.NA ) - expected_dtypes = pd.Series([np.float64, np.float64]) expected = pd.DataFrame({0: [np.nan, 0.0], 1: [0.0, np.nan]}) - tm.assert_series_equal(result.dtypes, expected_dtypes) tm.assert_frame_equal(result, expected) def test_replace_inplace(self, datetime_frame, float_string_frame):