From e2ee3f3cfc5663db9e2693f90920d04612975178 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 14 Jun 2023 16:44:36 -0700 Subject: [PATCH 1/2] REF: move values_for_json to EA --- pandas/core/arrays/base.py | 4 ++++ pandas/core/arrays/datetimelike.py | 6 ++++++ pandas/core/internals/blocks.py | 16 ---------------- pandas/core/internals/managers.py | 2 +- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 27eb7994d3ccb..554e4a3b385b4 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1443,6 +1443,10 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): # Non-Optimized Default Methods; in the case of the private methods here, # these are not guaranteed to be stable across pandas versions. + def _values_for_json(self) -> np.ndarray: + # TODO: document! + return np.asarray(self) + def _hash_pandas_object( self, *, encoding: str, hash_key: str, categorize: bool ) -> npt.NDArray[np.uint64]: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ea085b3d1f6ab..2c53a605185ec 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2186,6 +2186,12 @@ def _with_freq(self, freq) -> Self: # -------------------------------------------------------------- # ExtensionArray Interface + def _values_for_json(self) -> np.ndarray: + # Small performance bump vs the base class which calls np.asarray(self) + if isinstance(self.dtype, np.dtype): + return self._ndarray + return super()._values_for_json() + def factorize( self, use_na_sentinel: bool = True, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 981e29df2c323..eaeaa99fc6370 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1602,9 +1602,6 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: """ raise AbstractMethodError(self) - def values_for_json(self) -> np.ndarray: - raise AbstractMethodError(self) - class EABackedBlock(Block): """ @@ -1835,9 +1832,6 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(values).reshape(self.shape) - def values_for_json(self) -> np.ndarray: - return np.asarray(self.values) - def interpolate( self, *, @@ -2145,9 +2139,6 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: return self.values.astype(_dtype_obj) return self.values - def values_for_json(self) -> np.ndarray: - return self.values - @cache_readonly def is_numeric(self) -> bool: # type: ignore[override] dtype = self.values.dtype @@ -2245,9 +2236,6 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock): is_numeric = False values: DatetimeArray | TimedeltaArray - def values_for_json(self) -> np.ndarray: - return self.values._ndarray - def interpolate( self, *, @@ -2304,10 +2292,6 @@ class DatetimeTZBlock(DatetimeLikeBlock): _validate_ndim = True _can_consolidate = False - # Don't use values_for_json from DatetimeLikeBlock since it is - # an invalid optimization here(drop the tz) - values_for_json = NDArrayBackedExtensionBlock.values_for_json - # ----------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2a7c0536c66a4..b8b490c5bb56e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1110,7 +1110,7 @@ def column_arrays(self) -> list[np.ndarray]: for blk in self.blocks: mgr_locs = blk._mgr_locs - values = blk.values_for_json() + values = blk.array_values._values_for_json() if values.ndim == 1: # TODO(EA2D): special casing not needed with 2D EAs result[mgr_locs[0]] = values From ccdee02abd46db8ecd4949597ef218eb908b576a Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Jun 2023 09:58:51 -0700 Subject: [PATCH 2/2] docstring --- pandas/core/arrays/base.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 582e85162b33c..56284647ebb3e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1449,7 +1449,16 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): # these are not guaranteed to be stable across pandas versions. def _values_for_json(self) -> np.ndarray: - # TODO: document! + """ + Specify how to render our entries in to_json. + + Notes + ----- + The dtype on the returned ndarray is not restricted, but for non-native + types that are not specifically handled in objToJSON.c, to_json is + liable to raise. In these cases, it may be safer to return an ndarray + of strings. + """ return np.asarray(self) def _hash_pandas_object(