From 0e832111187ee3ef8742e744a46984e5ce148bdf Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 21 Nov 2022 13:20:55 +0000 Subject: [PATCH 1/3] API: NumericIndex([1 2, 3]).dtype should be int64 on 32-bit systems --- pandas/core/indexes/base.py | 8 ------ pandas/core/indexes/numeric.py | 13 ++++++--- pandas/tests/indexes/numeric/test_numeric.py | 29 ++++++++++---------- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c24c94a786068..0bc568fb122ed 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4962,14 +4962,6 @@ def _raise_scalar_data_error(cls, data): f"kind, {repr(data)} was passed" ) - @final - @classmethod - def _string_data_error(cls, data): - raise TypeError( - "String dtype not supported, you may need " - "to explicitly cast to a numeric type" - ) - def _validate_fill_value(self, value): """ Check if the value can be inserted into our array without casting, diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 816a1752c5bf0..6af8011ae40f4 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -32,6 +32,7 @@ ) from pandas.core.dtypes.generic import ABCSeries +from pandas.core.construction import sanitize_array from pandas.core.indexes.base import ( Index, maybe_extract_name, @@ -144,15 +145,19 @@ def _ensure_array(cls, data, dtype, copy: bool): data = list(data) orig = data - data = np.asarray(data, dtype=dtype) + if isinstance(data, (list, tuple)) and dtype is None: + if len(data): + data = sanitize_array(data, index=None) + else: + data = np.array([], dtype=np.int64) + else: + data = np.asarray(data, dtype=dtype) + if dtype is None and data.dtype.kind == "f": if cls is UInt64Index and (data >= 0).all(): # https://github.com/numpy/numpy/issues/19146 data = np.asarray(orig, dtype=np.uint64) - if issubclass(data.dtype.type, str): - cls._string_data_error(data) - dtype = cls._ensure_dtype(dtype) if copy or not is_dtype_equal(data.dtype, dtype): diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 8901ef7cb3e33..c06fce4811f12 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -64,6 +64,10 @@ def check_coerce(self, a, b, is_float_index=True): else: self.check_is_index(b) + def test_constructor_from_list_no_dtype(self): + index = self._index_cls([1.5, 2.5, 3.5]) + assert index.dtype == np.float64 + def test_constructor(self, dtype): index_cls = self._index_cls @@ -115,17 +119,10 @@ def test_constructor_invalid(self): with pytest.raises(TypeError, match=msg): index_cls(0.0) - # 2021-02-1 we get ValueError in numpy 1.20, but not on all builds - msg = "|".join( - [ - "String dtype not supported, you may need to explicitly cast ", - "could not convert string to float: 'a'", - ] - ) - with pytest.raises((TypeError, ValueError), match=msg): + msg = f"data is not compatible with {index_cls.__name__}" + with pytest.raises(ValueError, match=msg): index_cls(["a", "b", 0.0]) - msg = f"data is not compatible with {index_cls.__name__}" with pytest.raises(ValueError, match=msg): index_cls([Timestamp("20130101")]) @@ -327,18 +324,16 @@ def test_identical(self, simple_index, dtype): assert not index.astype(dtype=object).identical(index.astype(dtype=dtype)) def test_cant_or_shouldnt_cast(self): - msg = ( - "String dtype not supported, " - "you may need to explicitly cast to a numeric type" - ) + msg = f"data is not compatible with {self._index_cls.__name__}" + # can't data = ["foo", "bar", "baz"] - with pytest.raises(TypeError, match=msg): + with pytest.raises(ValueError, match=msg): self._index_cls(data) # shouldn't data = ["0", "1", "2"] - with pytest.raises(TypeError, match=msg): + with pytest.raises(ValueError, match=msg): self._index_cls(data) def test_view_index(self, simple_index): @@ -372,6 +367,10 @@ def simple_index(self, dtype): def index(self, request, dtype): return self._index_cls(request.param, dtype=dtype) + def test_constructor_from_list_no_dtype(self): + index = self._index_cls([1, 2, 3]) + assert index.dtype == np.int64 + def test_constructor(self, dtype): index_cls = self._index_cls From caf0c5a207defb14819cce4bd0218444e2e3250c Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 21 Nov 2022 18:56:14 +0000 Subject: [PATCH 2/3] cleanups --- pandas/core/construction.py | 2 +- pandas/core/indexes/numeric.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8aa41e939a809..bb82fc0192626 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -618,7 +618,7 @@ def sanitize_array( else: _sanitize_non_ordered(data) # materialize e.g. generators, convert e.g. tuples, abc.ValueView - data = list(data) + data = list(data) if not isinstance(data, list) else data if len(data) == 0 and dtype is None: # We default to float64, matching numpy diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 6af8011ae40f4..328e3d2f401e6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -145,13 +145,11 @@ def _ensure_array(cls, data, dtype, copy: bool): data = list(data) orig = data - if isinstance(data, (list, tuple)) and dtype is None: + if isinstance(data, (list, tuple)): if len(data): data = sanitize_array(data, index=None) else: data = np.array([], dtype=np.int64) - else: - data = np.asarray(data, dtype=dtype) if dtype is None and data.dtype.kind == "f": if cls is UInt64Index and (data >= 0).all(): @@ -203,7 +201,8 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: return cls._default_dtype dtype = pandas_dtype(dtype) - assert isinstance(dtype, np.dtype) + if not isinstance(dtype, np.dtype): + raise TypeError(f"{dtype} not a numpy type") if cls._is_backward_compat_public_numeric_index: # dtype for NumericIndex From 0e7b5325a14ce26df6a9a3f76d4d39e87396d2a9 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 21 Nov 2022 19:02:44 +0000 Subject: [PATCH 3/3] cleanups II --- pandas/core/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index bb82fc0192626..8aa41e939a809 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -618,7 +618,7 @@ def sanitize_array( else: _sanitize_non_ordered(data) # materialize e.g. generators, convert e.g. tuples, abc.ValueView - data = list(data) if not isinstance(data, list) else data + data = list(data) if len(data) == 0 and dtype is None: # We default to float64, matching numpy