diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 0b2b526dfe9e7..5698bbb5cfab9 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -219,17 +219,21 @@ Previously, these would be cast to a NumPy array with object dtype. In general, this should result in better performance when storing an array of intervals or periods in a :class:`Series` or column of a :class:`DataFrame`. -Note that the ``.values`` of a ``Series`` containing one of these types is no longer a NumPy -array, but rather an ``ExtensionArray``: +Use :attr:`Series.array` to extract the underlying array of intervals or periods +from the ``Series``:: .. ipython:: python - ser.values - pser.values + ser.array + pser.array -This is the same behavior as ``Series.values`` for categorical data. See -:ref:`whatsnew_0240.api_breaking.interval_values` for more. +.. warning:: + For backwards compatibility, :attr:`Series.values` continues to return + a NumPy array of objects for Interval and Period data. We recommend + using :attr:`Series.array` when you need the array of data stored in the + ``Series``, and :meth:`Series.to_numpy` when you know you need a NumPy array. + See :ref:`basics.dtypes` and :ref:`dsintro.attrs` for more. .. _whatsnew_0240.enhancements.styler_pipe: @@ -505,44 +509,6 @@ New Behavior on Windows: ...: print(f.read()) b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' -.. _whatsnew_0240.api_breaking.interval_values: - -``IntervalIndex.values`` is now an ``IntervalArray`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The :attr:`~Interval.values` attribute of an :class:`IntervalIndex` now returns an -``IntervalArray``, rather than a NumPy array of :class:`Interval` objects (:issue:`19453`). - -Previous Behavior: - -.. code-block:: ipython - - In [1]: idx = pd.interval_range(0, 4) - - In [2]: idx.values - Out[2]: - array([Interval(0, 1, closed='right'), Interval(1, 2, closed='right'), - Interval(2, 3, closed='right'), Interval(3, 4, closed='right')], - dtype=object) - -New Behavior: - -.. ipython:: python - - idx = pd.interval_range(0, 4) - idx.values - -This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``. - -For situations where you need an ``ndarray`` of ``Interval`` objects, use -:meth:`numpy.asarray`. - -.. ipython:: python - - np.asarray(idx) - idx.values.astype(object) - - .. _whatsnew_0240.api.timezone_offset_parsing: Parsing Datetime Strings with Timezone Offsets diff --git a/pandas/core/base.py b/pandas/core/base.py index e224b6a50d332..1d2a0a2544dbc 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -913,7 +913,7 @@ def _ndarray_values(self): - categorical -> codes """ if is_extension_array_dtype(self): - return self.values._ndarray_values + return self.array._ndarray_values return self.values @property @@ -1307,12 +1307,12 @@ def memory_usage(self, deep=False): Memory usage does not include memory consumed by elements that are not components of the array if deep=False or if used on PyPy """ - if hasattr(self.values, 'memory_usage'): - return self.values.memory_usage(deep=deep) + if hasattr(self.array, 'memory_usage'): + return self.array.memory_usage(deep=deep) - v = self.values.nbytes + v = self.array.nbytes if deep and is_object_dtype(self) and not PYPY: - v += lib.memory_usage_of_objects(self.values) + v += lib.memory_usage_of_objects(self.array) return v @Substitution( diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c30e64fcf04da..ee5f0820a7b3e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -326,6 +326,15 @@ def nbytes(self): # for TZ-aware return self._ndarray_values.nbytes + def memory_usage(self, deep=False): + # TODO: Remove this when we have a DatetimeTZArray + # Necessary to avoid recursion error since DTI._values is a DTI + # for TZ-aware + result = self._ndarray_values.nbytes + # include our engine hashtable + result += self._engine.sizeof(deep=deep) + return result + @cache_readonly def _is_dates_only(self): """Return a boolean if we are only dates (and don't have a timezone)""" diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 51c47a81f8e2f..d37da14ab5d2c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -21,9 +21,9 @@ _NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_extension_type, - is_float_dtype, is_integer, is_integer_dtype, is_list_like, - is_numeric_v_string_like, is_object_dtype, is_re, is_re_compilable, - is_sparse, is_timedelta64_dtype, pandas_dtype) + is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype, + is_list_like, is_numeric_v_string_like, is_object_dtype, is_period_dtype, + is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PandasExtensionDtype) @@ -1996,6 +1996,18 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): return blocks, mask +class ObjectValuesExtensionBlock(ExtensionBlock): + """ + Block providing backwards-compatibility for `.values`. + + Used by PeriodArray and IntervalArray to ensure that + Series[T].values is an ndarray of objects. + """ + + def external_values(self, dtype=None): + return self.values.astype(object) + + class NumericBlock(Block): __slots__ = () is_numeric = True @@ -3017,6 +3029,8 @@ def get_block_type(values, dtype=None): if is_categorical(values): cls = CategoricalBlock + elif is_interval_dtype(dtype) or is_period_dtype(dtype): + cls = ObjectValuesExtensionBlock elif is_extension_array_dtype(values): cls = ExtensionBlock elif issubclass(vtype, np.floating): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5f9860ce98b11..f1372a1fe2f51 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -30,8 +30,9 @@ from pandas.io.formats.printing import pprint_thing from .blocks import ( - Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock, _extend_blocks, - _merge_blocks, _safe_reshape, get_block_type, make_block) + Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock, + ObjectValuesExtensionBlock, _extend_blocks, _merge_blocks, _safe_reshape, + get_block_type, make_block) from .concat import ( # all for concatenate_block_managers combine_concat_plans, concatenate_join_units, get_mgr_concatenation_plan, is_uniform_join_units) @@ -1752,6 +1753,14 @@ def form_blocks(arrays, names, axes): blocks.extend(external_blocks) + if len(items_dict['ObjectValuesExtensionBlock']): + external_blocks = [ + make_block(array, klass=ObjectValuesExtensionBlock, placement=[i]) + for i, _, array in items_dict['ObjectValuesExtensionBlock'] + ] + + blocks.extend(external_blocks) + if len(extra_locs): shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index ff4f9b7847019..2bd7e2c0b9b82 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -433,7 +433,7 @@ def _unstack_extension_series(series, level, fill_value): level=level, fill_value=-1).get_result() out = [] - values = series.values + values = series.array for col, indices in result.iteritems(): out.append(Series(values.take(indices.values, diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 9904fcd362818..42e481d974295 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -231,7 +231,7 @@ def test_unstack(self, data, index, obj): for level in combinations: result = ser.unstack(level=level) - assert all(isinstance(result[col].values, type(data)) + assert all(isinstance(result[col].array, type(data)) for col in result.columns) expected = ser.astype(object).unstack(level=level) result = result.astype(object) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 79b1bc10b9f4b..2bc009c5a2fc8 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -492,3 +492,13 @@ def test_is_homogeneous_type(self): assert Series()._is_homogeneous_type assert Series([1, 2])._is_homogeneous_type assert Series(pd.Categorical([1, 2]))._is_homogeneous_type + + @pytest.mark.parametrize("data", [ + pd.period_range("2000", periods=4), + pd.IntervalIndex.from_breaks([1, 2, 3, 4]) + ]) + def test_values_compatibility(self, data): + # https://github.com/pandas-dev/pandas/issues/23995 + result = pd.Series(data).values + expected = np.array(data.astype(object)) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7a1828149cd87..faed4ccebd96b 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1340,11 +1340,11 @@ def assert_series_equal(left, right, check_dtype=True, assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype) elif is_interval_dtype(left) or is_interval_dtype(right): - assert_interval_array_equal(left.values, right.values) + assert_interval_array_equal(left.array, right.array) elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and is_extension_array_dtype(right) and not is_categorical_dtype(right)): - return assert_extension_array_equal(left.values, right.values) + return assert_extension_array_equal(left.array, right.array) else: _testing.assert_almost_equal(left.get_values(), right.get_values(),