Skip to content
15 changes: 10 additions & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7157,17 +7157,22 @@ def maybe_sequence_to_range(sequence) -> Any | range:
-------
Any : input or range
"""
if isinstance(sequence, (ABCSeries, Index, range, ExtensionArray)):
if isinstance(sequence, (range, ExtensionArray)):
return sequence
elif len(sequence) == 1 or lib.infer_dtype(sequence, skipna=False) != "integer":
return sequence
elif len(sequence) == 0:
elif isinstance(sequence, (ABCSeries, Index)) and not (
isinstance(sequence.dtype, np.dtype) and sequence.dtype.kind == "i"
):
return sequence
if len(sequence) == 0:
return range(0)
diff = sequence[1] - sequence[0]
np_sequence = np.asarray(sequence, dtype=np.int64)
diff = np_sequence[1] - np_sequence[0]
if diff == 0:
return sequence
elif len(sequence) == 2 or lib.is_sequence_range(np.asarray(sequence), diff):
return range(sequence[0], sequence[-1] + diff, diff)
elif len(sequence) == 2 or lib.is_sequence_range(np_sequence, diff):
return range(np_sequence[0], np_sequence[-1] + diff, diff)
else:
return sequence

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_set_index_dst(self):

def test_set_index(self, float_string_frame):
df = float_string_frame
idx = Index(np.arange(len(df))[::-1])
idx = Index(np.arange(len(df) - 1, -1, -1, dtype=np.int64))

df = df.set_index(idx)
tm.assert_index_equal(df.index, idx)
Expand Down
22 changes: 6 additions & 16 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,6 @@ def test_read_write_reread_dta14(self, file, parsed_114, version, datapath):
written_and_read_again = self.read_dta(path)

expected = parsed_114.copy()
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -576,7 +575,6 @@ def test_numeric_column_names(self):
written_and_read_again.columns = map(convert_col_name, columns)

expected = original
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(expected, written_and_read_again)

@pytest.mark.parametrize("version", [114, 117, 118, 119, None])
Expand All @@ -594,7 +592,6 @@ def test_nan_to_missing_value(self, version):

written_and_read_again = written_and_read_again.set_index("index")
expected = original
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(written_and_read_again, expected)

def test_no_index(self):
Expand All @@ -617,7 +614,6 @@ def test_string_no_dates(self):
written_and_read_again = self.read_dta(path)

expected = original
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)

def test_large_value_conversion(self):
Expand All @@ -637,7 +633,6 @@ def test_large_value_conversion(self):
modified["s1"] = Series(modified["s1"], dtype=np.int16)
modified["s2"] = Series(modified["s2"], dtype=np.int32)
modified["s3"] = Series(modified["s3"], dtype=np.float64)
modified.index = original.index.astype(np.int32)
tm.assert_frame_equal(written_and_read_again.set_index("index"), modified)

def test_dates_invalid_column(self):
Expand Down Expand Up @@ -713,7 +708,7 @@ def test_write_missing_strings(self):

expected = DataFrame(
[["1"], [""]],
index=pd.Index([0, 1], dtype=np.int32, name="index"),
index=pd.RangeIndex(2, name="index"),
columns=["foo"],
)

Expand Down Expand Up @@ -746,7 +741,6 @@ def test_bool_uint(self, byteorder, version):
written_and_read_again = written_and_read_again.set_index("index")

expected = original
expected.index = expected.index.astype(np.int32)
expected_types = (
np.int8,
np.int8,
Expand Down Expand Up @@ -1030,7 +1024,7 @@ def test_categorical_writing(self, version):
res = written_and_read_again.set_index("index")

expected = original
expected.index = expected.index.set_names("index").astype(np.int32)
expected.index = expected.index.set_names("index")

expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str)
expected["unlabeled"] = expected["unlabeled"].apply(str)
Expand Down Expand Up @@ -1094,7 +1088,6 @@ def test_categorical_with_stata_missing_values(self, version):
new_cats = cat.remove_unused_categories().categories
cat = cat.set_categories(new_cats, ordered=True)
expected[col] = cat
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(res, expected)

@pytest.mark.parametrize("file", ["stata10_115", "stata10_117"])
Expand Down Expand Up @@ -1544,7 +1537,6 @@ def test_out_of_range_float(self):

original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64)
expected = original
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(reread.set_index("index"), expected)

@pytest.mark.parametrize("infval", [np.inf, -np.inf])
Expand Down Expand Up @@ -1669,7 +1661,6 @@ def test_writer_117(self):
original["int32"] = original["int32"].astype(np.int32)
original["float32"] = Series(original["float32"], dtype=np.float32)
original.index.name = "index"
original.index = original.index.astype(np.int32)
copy = original.copy()
with tm.ensure_clean() as path:
original.to_stata(
Expand Down Expand Up @@ -1962,7 +1953,7 @@ def test_read_write_ea_dtypes(self, dtype_backend):
# stata stores with ms unit, so unit does not round-trip exactly
"e": pd.date_range("2020-12-31", periods=3, freq="D", unit="ms"),
},
index=pd.Index([0, 1, 2], name="index", dtype=np.int32),
index=pd.RangeIndex(range(3), name="index"),
)

tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
Expand Down Expand Up @@ -2049,7 +2040,6 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten
reread = read_stata(fp, index_col="index")

expected = df
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(reread, expected)


Expand All @@ -2075,7 +2065,6 @@ def test_compression_dict(method, file_ext):
reread = read_stata(fp, index_col="index")

expected = df
expected.index = expected.index.astype(np.int32)
tm.assert_frame_equal(reread, expected)


Expand All @@ -2085,7 +2074,6 @@ def test_chunked_categorical(version):
df.index.name = "index"

expected = df.copy()
expected.index = expected.index.astype(np.int32)

with tm.ensure_clean() as path:
df.to_stata(path, version=version)
Expand All @@ -2094,7 +2082,9 @@ def test_chunked_categorical(version):
block = block.set_index("index")
assert "cats" in block
tm.assert_series_equal(
block.cats, expected.cats.iloc[2 * i : 2 * (i + 1)]
block.cats,
expected.cats.iloc[2 * i : 2 * (i + 1)],
check_index_type=len(block) > 1,
)


Expand Down
19 changes: 12 additions & 7 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2192,23 +2192,28 @@ def test_merge_on_indexes(self, how, sort, expected):

@pytest.mark.parametrize(
"index",
[Index([1, 2], dtype=dtyp, name="index_col") for dtyp in tm.ALL_REAL_NUMPY_DTYPES]
[
Index([1, 2, 4], dtype=dtyp, name="index_col")
for dtyp in tm.ALL_REAL_NUMPY_DTYPES
]
+ [
CategoricalIndex(["A", "B"], categories=["A", "B"], name="index_col"),
RangeIndex(start=0, stop=2, name="index_col"),
DatetimeIndex(["2018-01-01", "2018-01-02"], name="index_col"),
CategoricalIndex(["A", "B", "C"], categories=["A", "B", "C"], name="index_col"),
RangeIndex(start=0, stop=3, name="index_col"),
DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"], name="index_col"),
],
ids=lambda x: f"{type(x).__name__}[{x.dtype}]",
)
def test_merge_index_types(index):
# gh-20777
# assert key access is consistent across index types
left = DataFrame({"left_data": [1, 2]}, index=index)
right = DataFrame({"right_data": [1.0, 2.0]}, index=index)
left = DataFrame({"left_data": [1, 2, 3]}, index=index)
right = DataFrame({"right_data": [1.0, 2.0, 3.0]}, index=index)

result = left.merge(right, on=["index_col"])

expected = DataFrame({"left_data": [1, 2], "right_data": [1.0, 2.0]}, index=index)
expected = DataFrame(
{"left_data": [1, 2, 3], "right_data": [1.0, 2.0, 3.0]}, index=index
)
tm.assert_frame_equal(result, expected)


Expand Down