From 2cd4b3969718ccf7ff621adc27e434c6352116c7 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Mar 2021 15:08:44 -0700 Subject: [PATCH 1/2] REF: share to_native_types with ArrayManager --- pandas/core/internals/array_manager.py | 3 +- pandas/core/internals/blocks.py | 144 +++++++++++++------------ 2 files changed, 77 insertions(+), 70 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index ef9981f40efe1..460d19290d56f 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -88,6 +88,7 @@ from pandas.core.internals.blocks import ( ensure_block_shape, new_block, + to_native_types, ) if TYPE_CHECKING: @@ -634,7 +635,7 @@ def replace_list( ) def to_native_types(self, **kwargs): - return self.apply_with_block("to_native_types", **kwargs) + return self.apply(to_native_types, **kwargs) @property def is_mixed_type(self) -> bool: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3fd1ebaca19f0..889454a2cf6df 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -97,6 +97,7 @@ FloatingArray, IntegerArray, PandasArray, + TimedeltaArray, ) from pandas.core.base import PandasObject import pandas.core.common as com @@ -654,22 +655,8 @@ def should_store(self, value: ArrayLike) -> bool: def to_native_types(self, na_rep="nan", quoting=None, **kwargs): """ convert to our native types format """ - values = self.values - - mask = isna(values) - itemsize = writers.word_len(na_rep) - - if not self.is_object and not quoting and itemsize: - values = values.astype(str) - if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: - # enlarge for the na_rep - values = values.astype(f" np.ndarray: def array_values(self) -> ExtensionArray: return self.values - def to_native_types(self, na_rep="nan", quoting=None, **kwargs): - """override to use ExtensionArray astype for the conversion""" - values = self.values - mask = isna(values) - - new_values = np.asarray(values.astype(object)) - new_values[mask] = na_rep - return self.make_block(new_values) - def take_nd( self, indexer, @@ -1808,41 +1786,6 @@ def is_bool(self): class FloatBlock(NumericBlock): __slots__ = () - def to_native_types( - self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs - ): - """ convert to our native types format """ - values = self.values - - # see gh-13418: no special formatting is desired at the - # output (important for appropriate 'quoting' behaviour), - # so do not pass it through the FloatArrayFormatter - if float_format is None and decimal == ".": - mask = isna(values) - - if not quoting: - values = values.astype(str) - else: - values = np.array(values, dtype="object") - - values[mask] = na_rep - values = values.astype(object, copy=False) - return self.make_block(values) - - from pandas.io.formats.format import FloatArrayFormatter - - formatter = FloatArrayFormatter( - values, - na_rep=na_rep, - float_format=float_format, - decimal=decimal, - quoting=quoting, - fixed_width=False, - ) - res = formatter.get_result_as_array() - res = res.astype(object, copy=False) - return self.make_block(res) - class NDArrayBackedExtensionBlock(HybridMixin, Block): """ @@ -1966,14 +1909,6 @@ def _holder(self): def fill_value(self): return na_value_for_dtype(self.dtype) - def to_native_types(self, na_rep="NaT", **kwargs): - """ convert to our native types format """ - arr = self.array_values() - - result = arr._format_native_types(na_rep=na_rep, **kwargs) - result = result.astype(object, copy=False) - return self.make_block(result) - class DatetimeBlock(DatetimeLikeBlockMixin): __slots__ = () @@ -1999,7 +1934,6 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): internal_values = Block.internal_values _can_hold_element = DatetimeBlock._can_hold_element - to_native_types = DatetimeBlock.to_native_types diff = DatetimeBlock.diff where = DatetimeBlock.where putmask = DatetimeLikeBlockMixin.putmask @@ -2316,3 +2250,75 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: # We can't, and don't need to, reshape. values = np.asarray(values).reshape(1, -1) return values + + +def to_native_types( + values: ArrayLike, + *, + na_rep="nan", + quoting=None, + float_format=None, + decimal=".", + **kwargs, +) -> np.ndarray: + """ convert to our native types format """ + values = ensure_wrapped_if_datetimelike(values) + + if isinstance(values, (DatetimeArray, TimedeltaArray)): + result = values._format_native_types(na_rep=na_rep, **kwargs) + result = result.astype(object, copy=False) + return result + + elif isinstance(values, ExtensionArray): + mask = isna(values) + + new_values = np.asarray(values.astype(object)) + new_values[mask] = na_rep + return new_values + + elif values.dtype.kind == "f": + # see GH#13418: no special formatting is desired at the + # output (important for appropriate 'quoting' behaviour), + # so do not pass it through the FloatArrayFormatter + if float_format is None and decimal == ".": + mask = isna(values) + + if not quoting: + values = values.astype(str) + else: + values = np.array(values, dtype="object") + + values[mask] = na_rep + values = values.astype(object, copy=False) + return values + + from pandas.io.formats.format import FloatArrayFormatter + + formatter = FloatArrayFormatter( + values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + res = formatter.get_result_as_array() + res = res.astype(object, copy=False) + return res + + else: + + mask = isna(values) + itemsize = writers.word_len(na_rep) + + if values.dtype != _dtype_obj and not quoting and itemsize: + values = values.astype(str) + if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: + # enlarge for the na_rep + values = values.astype(f" Date: Wed, 17 Mar 2021 15:48:02 -0700 Subject: [PATCH 2/2] REF: share fill_value --- pandas/core/internals/blocks.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 889454a2cf6df..99e54bace8915 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -261,9 +261,11 @@ def get_block_values_for_json(self) -> np.ndarray: # TODO(EA2D): reshape will be unnecessary with 2D EAs return np.asarray(self.values).reshape(self.shape) + @final @property def fill_value(self): - return np.nan + # Used in reindex_indexer + return na_value_for_dtype(self.dtype, compat=False) @property def mgr_locs(self) -> BlockPlacement: @@ -653,6 +655,7 @@ def should_store(self, value: ArrayLike) -> bool: """ return is_dtype_equal(value.dtype, self.dtype) + @final def to_native_types(self, na_rep="nan", quoting=None, **kwargs): """ convert to our native types format """ result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) @@ -1485,11 +1488,6 @@ def _holder(self): # For extension blocks, the holder is values-dependent. return type(self.values) - @property - def fill_value(self): - # Used in reindex_indexer - return self.values.dtype.na_value - @property def _can_hold_na(self): # The default ExtensionArray._can_hold_na is True @@ -1905,10 +1903,6 @@ def array_values(self): def _holder(self): return type(self.array_values()) - @property - def fill_value(self): - return na_value_for_dtype(self.dtype) - class DatetimeBlock(DatetimeLikeBlockMixin): __slots__ = ()