From 06e1e346e2154dddecbca311044772075ef54b4c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Nov 2022 19:32:13 -0800 Subject: [PATCH 1/6] one less with_infer --- pandas/_testing/__init__.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 02ee13d60427e..43020ae471f10 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -274,7 +274,7 @@ def box_expected(expected, box_cls, transpose: bool = True): else: expected = pd.array(expected, copy=False) elif box_cls is Index: - expected = Index._with_infer(expected) + expected = Index(expected) elif box_cls is Series: expected = Series(expected) elif box_cls is DataFrame: diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 529dd6baa70c0..d0d1a46893483 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1162,6 +1162,9 @@ def test_numarr_with_dtype_add_int(self, dtype, box_with_array): ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) + if box is Index and dtype is object: + # TODO: avoid this; match behavior with Series + expected = expected.astype(np.int64) result = 1 + ser tm.assert_equal(result, expected) From 677826dc753ae99891dcf7eda5089a1d1a648605 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 1 Dec 2022 09:01:52 -0800 Subject: [PATCH 2/6] _with_infer less --- pandas/core/strings/accessor.py | 2 +- pandas/core/util/hashing.py | 4 +--- pandas/tests/arithmetic/test_numeric.py | 3 +++ pandas/tests/arrays/integer/test_dtypes.py | 2 +- pandas/tests/extension/base/groupby.py | 4 ++-- pandas/tests/extension/test_string.py | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 71a50c69bfee1..8cd4cb976503d 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -319,7 +319,7 @@ def cons_row(x): out = out.get_level_values(0) return out else: - return Index._with_infer(result, name=name) + return Index(result, name=name) else: index = self._orig.index # This is a mess. diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 5a5e46e0227aa..e0b18047aa0ec 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -344,9 +344,7 @@ def _hash_ndarray( ) codes, categories = factorize(vals, sort=False) - cat = Categorical( - codes, Index._with_infer(categories), ordered=False, fastpath=True - ) + cat = Categorical(codes, Index(categories), ordered=False, fastpath=True) return _hash_categorical(cat, encoding, hash_key) try: diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index d0d1a46893483..f2af85c2e388d 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1147,6 +1147,9 @@ def test_numarr_with_dtype_add_nan(self, dtype, box_with_array): ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) + if box is Index and dtype is object: + # TODO: avoid this; match behavior with Series + expected = expected.astype(np.float64) result = np.nan + ser tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 1566476c32989..f34953876f5f4 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -89,7 +89,7 @@ def test_astype_index(all_data, dropna): other = all_data dtype = all_data.dtype - idx = pd.Index._with_infer(np.array(other)) + idx = pd.Index(np.array(other)) assert isinstance(idx, ABCIndex) result = idx.astype(dtype) diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 1f46442ee13b0..339c6560d6212 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -33,7 +33,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping): _, uniques = pd.factorize(data_for_grouping, sort=True) if as_index: - index = pd.Index._with_infer(uniques, name="B") + index = pd.Index(uniques, name="B") expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") self.assert_series_equal(result, expected) else: @@ -61,7 +61,7 @@ def test_groupby_extension_no_sort(self, data_for_grouping): result = df.groupby("B", sort=False).A.mean() _, index = pd.factorize(data_for_grouping, sort=False) - index = pd.Index._with_infer(index, name="B") + index = pd.Index(index, name="B") expected = pd.Series([1.0, 3.0, 4.0], index=index, name="A") self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index ecc69113882c5..de7967a8578b5 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -391,7 +391,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping): _, uniques = pd.factorize(data_for_grouping, sort=True) if as_index: - index = pd.Index._with_infer(uniques, name="B") + index = pd.Index(uniques, name="B") expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") self.assert_series_equal(result, expected) else: From bb8e49d61d4f950459f0b24e09342a17870ca73a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 1 Dec 2022 13:44:55 -0800 Subject: [PATCH 3/6] less _with_infer --- pandas/core/indexes/base.py | 10 ++++++---- pandas/core/indexes/multi.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 01a1ebd459616..065ee8f86ca6a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4233,7 +4233,7 @@ def _reindex_non_unique( if isinstance(self, ABCMultiIndex): new_index = type(self).from_tuples(new_labels, names=self.names) else: - new_index = Index._with_infer(new_labels, name=self.name) + new_index = Index(new_labels, name=self.name) return new_index, indexer, new_indexer # -------------------------------------------------------------------- @@ -6477,7 +6477,7 @@ def insert(self, loc: int, item) -> Index: if self._typ == "numericindex": # Use self._constructor instead of Index to retain NumericIndex GH#43921 # TODO(2.0) can use Index instead of self._constructor - return self._constructor._with_infer(new_values, name=self.name) + return self._constructor(new_values, name=self.name) else: return Index._with_infer(new_values, name=self.name) @@ -6850,7 +6850,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: if len(sequences) == 1: if names is not None: names = names[0] - return Index._with_infer(sequences[0], name=names) + return Index(sequences[0], name=names) else: return MultiIndex.from_arrays(sequences, names=names) @@ -6894,6 +6894,7 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: if isinstance(index_like, ABCSeries): name = index_like.name return Index._with_infer(index_like, name=name, copy=copy) + # _with_infer needed for test_value_counts_normalized if is_iterator(index_like): index_like = list(index_like) @@ -6909,8 +6910,9 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: return MultiIndex.from_arrays(index_like) else: - return Index._with_infer(index_like, copy=copy, tupleize_cols=False) + return Index(index_like, copy=copy, tupleize_cols=False) else: + # with_infer needed for stata tests return Index._with_infer(index_like, copy=copy) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f0b0ec23dba1a..012a92793acf9 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2112,7 +2112,7 @@ def append(self, other): # setting names to None automatically return MultiIndex.from_tuples(new_tuples) except (TypeError, IndexError): - return Index._with_infer(new_tuples) + return Index(new_tuples) def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: if len(args) == 0 and len(kwargs) == 0: From 0fb488dcf864f5a28154ec5e626c7ebb85df0e20 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 1 Dec 2022 14:55:04 -0800 Subject: [PATCH 4/6] less _with_infer --- pandas/core/algorithms.py | 2 +- pandas/core/indexes/base.py | 6 +++--- pandas/tests/io/test_stata.py | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 65691e6f46eb5..31d635423b020 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -871,7 +871,7 @@ def value_counts( # For backwards compatibility, we let Index do its normal type # inference, _except_ for if if infers from object to bool. - idx = Index._with_infer(keys) + idx = Index(keys) if idx.dtype == bool and keys.dtype == object: idx = idx.astype(object) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 065ee8f86ca6a..4d8b0ee7b9b4c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2678,6 +2678,7 @@ def fillna(self, value=None, downcast=None): if downcast is None: # no need to care metadata other than name # because it can't have freq if it has NaTs + # _with_infer needed for test_fillna_categorical return Index._with_infer(result, name=self.name) raise NotImplementedError( f"{type(self).__name__}.fillna does not support 'downcast' " @@ -6893,8 +6894,7 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: if isinstance(index_like, ABCSeries): name = index_like.name - return Index._with_infer(index_like, name=name, copy=copy) - # _with_infer needed for test_value_counts_normalized + return Index(index_like, name=name, copy=copy) if is_iterator(index_like): index_like = list(index_like) @@ -6913,7 +6913,7 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: return Index(index_like, copy=copy, tupleize_cols=False) else: # with_infer needed for stata tests - return Index._with_infer(index_like, copy=copy) + return Index(index_like, copy=copy) def ensure_has_len(seq): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 535c2d3e7e0f3..530934df72606 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -20,7 +20,6 @@ DataFrame, Series, ) -from pandas.core.indexes.api import ensure_index from pandas.tests.io.test_compression import _compression_to_extension from pandas.io.parsers import read_csv @@ -1144,7 +1143,7 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame: if is_categorical_dtype(ser.dtype): cat = ser._values.remove_unused_categories() if cat.categories.dtype == object: - categories = ensure_index(cat.categories._values) + categories = pd.Index._with_infer(cat.categories._values) cat = cat.set_categories(categories) from_frame[col] = cat return from_frame From cb32aec89a36f107d0054f7831fec0b5485e6d8d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 2 Dec 2022 07:33:53 -0800 Subject: [PATCH 5/6] mypy fixup --- pandas/core/indexes/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4d8b0ee7b9b4c..20e0c2851fb36 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4231,10 +4231,10 @@ def _reindex_non_unique( new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) new_indexer[~check] = -1 - if isinstance(self, ABCMultiIndex): - new_index = type(self).from_tuples(new_labels, names=self.names) - else: + if not isinstance(self, ABCMultiIndex): new_index = Index(new_labels, name=self.name) + else: + new_index = type(self).from_tuples(new_labels, names=self.names) return new_index, indexer, new_indexer # -------------------------------------------------------------------- From 1f549749801e75d8cfb25764b736060c696c9d25 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 2 Dec 2022 11:16:11 -0800 Subject: [PATCH 6/6] removecomment --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 20e0c2851fb36..0b55416d2bd7e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6912,7 +6912,6 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: else: return Index(index_like, copy=copy, tupleize_cols=False) else: - # with_infer needed for stata tests return Index(index_like, copy=copy)