diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c1d9b2744b27e..a3fd1918f0067 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -619,6 +619,7 @@ Other API changes new DataFrame (shallow copy) instead of the original DataFrame, consistent with other methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) +- Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`) - Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) - The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8410678db4f0d..be84e292b63e7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -538,6 +538,10 @@ def _ensure_array(cls, data, dtype, copy: bool): if data.ndim > 1: # GH#13601, GH#20285, GH#27125 raise ValueError("Index data must be 1-dimensional") + elif dtype == np.float16: + # float16 not supported (no indexing engine) + raise NotImplementedError("float16 indexes are not supported") + if copy: # asarray_tuplesafe does not always copy underlying data, # so need to make sure that this happens diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 533ea56d8a7e3..99565f380f6af 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import Callable - import numpy as np from pandas._typing import Dtype @@ -10,20 +8,7 @@ doc, ) -from pandas.core.dtypes.common import ( - is_dtype_equal, - is_integer_dtype, - is_numeric_dtype, - is_scalar, - pandas_dtype, -) -from pandas.core.dtypes.generic import ABCSeries - -from pandas.core.construction import sanitize_array -from pandas.core.indexes.base import ( - Index, - maybe_extract_name, -) +from pandas.core.indexes.base import Index class NumericIndex(Index): @@ -64,102 +49,20 @@ class NumericIndex(Index): """ _typ = "numericindex" - _values: np.ndarray _default_dtype: np.dtype | None = None - _dtype_validation_metadata: tuple[Callable[..., bool], str] = ( - is_numeric_dtype, - "numeric type", - ) _can_hold_strings = False def __new__( cls, data=None, dtype: Dtype | None = None, copy: bool = False, name=None ) -> NumericIndex: - name = maybe_extract_name(name, data, cls) - - subarr = cls._ensure_array(data, dtype, copy) - return cls._simple_new(subarr, name=name) - - @classmethod - def _ensure_array(cls, data, dtype, copy: bool): - """ - Ensure we have a valid array to pass to _simple_new. - """ - cls._validate_dtype(dtype) - if dtype == np.float16: - - # float16 not supported (no indexing engine) - raise NotImplementedError("float16 indexes are not supported") - - if not isinstance(data, (np.ndarray, Index)): - # Coerce to ndarray if not already ndarray or Index - if is_scalar(data): - cls._raise_scalar_data_error(data) - - # other iterable of some kind - if not isinstance(data, (ABCSeries, list, tuple)): - data = list(data) - - if isinstance(data, (list, tuple)): - if len(data): - data = sanitize_array(data, index=None) - else: - data = np.array([], dtype=np.int64) - - dtype = cls._ensure_dtype(dtype) - - if copy or not is_dtype_equal(data.dtype, dtype): - # TODO: the try/except below is because it's difficult to predict the error - # and/or error message from different combinations of data and dtype. - # Efforts to avoid this try/except welcome. - # See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222 - try: - subarr = np.array(data, dtype=dtype, copy=copy) - cls._validate_dtype(subarr.dtype) - except (TypeError, ValueError): - raise ValueError(f"data is not compatible with {cls.__name__}") - cls._assert_safe_casting(data, subarr) - else: - subarr = data - - if subarr.ndim > 1: - # GH#13601, GH#20285, GH#27125 - raise ValueError("Index data must be 1-dimensional") - - subarr = np.asarray(subarr) - if subarr.dtype == "float16": - # float16 not supported (no indexing engine) - raise NotImplementedError("float16 indexes are not implemented") - - return subarr - - @classmethod - def _validate_dtype(cls, dtype: Dtype | None) -> None: - if dtype is None: - return - - validation_func, expected = cls._dtype_validation_metadata - if not validation_func(dtype): - raise ValueError( - f"Incorrect `dtype` passed: expected {expected}, received {dtype}" - ) - - @classmethod - def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: - """ - Assumes dtype has already been validated. - """ - if dtype is None: - return cls._default_dtype - - dtype = pandas_dtype(dtype) - if not isinstance(dtype, np.dtype): - raise TypeError(f"{dtype} not a numpy type") - elif dtype == np.float16: - # float16 not supported (no indexing engine) - raise NotImplementedError("float16 indexes are not supported") - - return dtype + # temporary scaffolding, will be removed soon. + if isinstance(data, list) and len(data) == 0: + data = np.array([], dtype=np.int64) + elif isinstance(data, range): + data = np.arange(data.start, data.stop, data.step, dtype=np.int64) + return super().__new__( + cls, data=data, dtype=dtype, copy=copy, name=name + ) # type: ignore[return-value] # ---------------------------------------------------------------- # Indexing Methods @@ -168,17 +71,3 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: @doc(Index._should_fallback_to_positional) def _should_fallback_to_positional(self) -> bool: return False - - # ---------------------------------------------------------------- - - @classmethod - def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: - """ - Ensure incoming data can be represented with matching signed-ness. - - Needed if the process of casting data from some accepted dtype to the internal - dtype(s) bears the risk of truncation (e.g. float to int). - """ - if is_integer_dtype(subarr.dtype): - if not np.array_equal(data, subarr): - raise TypeError("Unsafe NumPy casting, you must explicitly cast") diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0ff733ab51b85..daa824c301a82 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -98,6 +98,7 @@ class RangeIndex(NumericIndex): _typ = "rangeindex" _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") _range: range + _values: np.ndarray @property def _engine_type(self) -> type[libindex.Int64Engine]: @@ -178,6 +179,17 @@ def _simple_new( # type: ignore[override] result._reset_identity() return result + @classmethod + def _validate_dtype(cls, dtype: Dtype | None) -> None: + if dtype is None: + return + + validation_func, expected = cls._dtype_validation_metadata + if not validation_func(dtype): + raise ValueError( + f"Incorrect `dtype` passed: expected {expected}, received {dtype}" + ) + # -------------------------------------------------------------------- # error: Return type "Type[NumericIndex]" of "_constructor" incompatible with return diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 6184b3288f886..e3f933a35efe7 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -39,8 +39,8 @@ class ConstructorTests: params=[ ([3, 14, 15, 92, 653], np.int64), (np.arange(10, dtype="int64"), np.int64), - (NumericIndex(range(-10, 11), dtype=np.int64), np.int64), - (NumericIndex(range(10, 31), dtype=np.uint64), np.uint64), + (NumericIndex(np.arange(-10, 11, dtype=np.int64)), np.int64), + (NumericIndex(np.arange(10, 31, dtype=np.uint64)), np.uint64), (NumericIndex(np.arange(20, 30, 0.5), dtype=np.float64), np.float64), (date_range("20180101", periods=10), "