Skip to content

Commit 7b84f5d

Browse files
committed
ENH: Generalization of RangeIndex to support floats. (pandas-dev#46484)
1 parent 61464f8 commit 7b84f5d

File tree

4 files changed

+159
-56
lines changed

4 files changed

+159
-56
lines changed

pandas/core/dtypes/common.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -112,33 +112,34 @@ def ensure_str(value: bytes | Any) -> str:
112112
return value
113113

114114

115-
def ensure_python_int(value: int | np.integer) -> int:
115+
def ensure_python_intfloat(
116+
value: int | float | np.integer | np.inexact, coerce_float: bool
117+
) -> int | float:
116118
"""
117-
Ensure that a value is a python int.
119+
Ensure that a value is a python int or float.
118120
119121
Parameters
120122
----------
121-
value: int or numpy.integer
123+
value: int or float or numpy.integer or numpy.complex_
124+
coerce_float: bool
122125
123126
Returns
124127
-------
125-
int
128+
int or float
126129
127130
Raises
128131
------
129-
TypeError: if the value isn't an int or can't be converted to one.
132+
TypeError: if the value isn't an int/float and can't be converted to either.
130133
"""
131-
if not (is_integer(value) or is_float(value)):
134+
if is_integer(value) or is_float(value):
135+
new_value = float(value) if coerce_float else int(value)
136+
assert np.isclose(value, new_value) if coerce_float else value == new_value
137+
else:
132138
if not is_scalar(value):
133139
raise TypeError(
134140
f"Value needs to be a scalar value, was type {type(value).__name__}"
135141
)
136142
raise TypeError(f"Wrong type {type(value)} for value {value}")
137-
try:
138-
new_value = int(value)
139-
assert new_value == value
140-
except (TypeError, ValueError, AssertionError) as err:
141-
raise TypeError(f"Wrong type {type(value)} for value {value}") from err
142143
return new_value
143144

144145

@@ -1822,7 +1823,7 @@ def is_all_strings(value: ArrayLike) -> bool:
18221823
"DT64NS_DTYPE",
18231824
"ensure_float",
18241825
"ensure_float64",
1825-
"ensure_python_int",
1826+
"ensure_python_intfloat",
18261827
"ensure_str",
18271828
"get_dtype",
18281829
"infer_dtype_from_object",

pandas/core/indexes/float_range.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
5+
6+
class float_range:
7+
"""
8+
Modified Python built-in function range that accepts floats as arguments.
9+
10+
Parameters
11+
----------
12+
start : float (default: 0.0)
13+
If "stop" is not given, interpreted as "stop" instead.
14+
stop : float (default: 0.0)
15+
step : float (default: 1.0)
16+
17+
Attributes
18+
----------
19+
start
20+
stop
21+
step
22+
current
23+
"""
24+
25+
def __init__(self, start=0.0, stop=0.0, step=1.0):
26+
if stop is None:
27+
stop, start = start, 0.0
28+
self.start = start
29+
self.current = start - step
30+
self.stop = stop
31+
self.step = step
32+
33+
def __contains__(self, key):
34+
closeness_to_range = (key - self.start) % self.step
35+
return (
36+
(self.start < key or np.isclose(self.start, key))
37+
and (key < self.stop and not np.isclose(self.stop, key))
38+
and (
39+
np.isclose(closeness_to_range, 0)
40+
or np.isclose(closeness_to_range, self.step)
41+
)
42+
)
43+
44+
def __eq__(self, other):
45+
return (
46+
np.isclose(self.start, other.start)
47+
and np.isclose(self.stop, other.stop)
48+
and np.isclose(self.step, other.step)
49+
)
50+
51+
def __getitem__(self, key):
52+
# Allows for reversing
53+
if isinstance(key, slice):
54+
if key.start is None and key.stop is None and key.step == -1:
55+
return float_range(
56+
start=self.stop - self.step,
57+
stop=self.start - self.step,
58+
step=-self.step,
59+
)
60+
raise IndexError
61+
raise TypeError
62+
63+
def __iter__(self):
64+
return self
65+
66+
def __len__(self):
67+
return self.length
68+
69+
def __next__(self):
70+
self.current += self.step
71+
if self.current < self.stop:
72+
return self.current
73+
raise StopIteration
74+
75+
def index(self, key):
76+
if key not in self:
77+
raise ValueError
78+
return round((key - self.start) / self.step)
79+
80+
@property
81+
def length(self):
82+
if self.step > 0:
83+
lo, hi = self.start, self.stop
84+
step = self.step
85+
else:
86+
hi, lo = self.start, self.stop
87+
step = -self.step
88+
if lo > hi or np.isclose(lo, hi):
89+
return 0
90+
else:
91+
return round((hi - lo - step) / step + 1)

pandas/core/indexes/range.py

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@
3535

3636
from pandas.core.dtypes.common import (
3737
ensure_platform_int,
38-
ensure_python_int,
38+
ensure_python_intfloat,
3939
is_float,
4040
is_integer,
41+
is_numeric_dtype,
4142
is_scalar,
42-
is_signed_integer_dtype,
4343
is_timedelta64_dtype,
4444
)
4545
from pandas.core.dtypes.generic import ABCTimedeltaIndex
@@ -50,6 +50,7 @@
5050
from pandas.core.construction import extract_array
5151
import pandas.core.indexes.base as ibase
5252
from pandas.core.indexes.base import maybe_extract_name
53+
from pandas.core.indexes.float_range import float_range
5354
from pandas.core.indexes.numeric import (
5455
Float64Index,
5556
Int64Index,
@@ -67,20 +68,20 @@ class RangeIndex(NumericIndex):
6768
"""
6869
Immutable Index implementing a monotonic integer range.
6970
70-
RangeIndex is a memory-saving special case of Int64Index limited to
71-
representing monotonic ranges. Using RangeIndex may in some instances
72-
improve computing speed.
71+
RangeIndex is a memory-saving special case of NumericIndex
72+
limited to representing monotonic ranges. Using RangeIndex may in some
73+
instances improve computing speed.
7374
74-
This is the default index type used
75-
by DataFrame and Series when no explicit index is provided by the user.
75+
This is the default index type used by DataFrame and Series when no
76+
explicit index is provided by the user.
7677
7778
Parameters
7879
----------
79-
start : int (default: 0), range, or other RangeIndex instance
80-
If int and "stop" is not given, interpreted as "stop" instead.
81-
stop : int (default: 0)
82-
step : int (default: 1)
83-
dtype : np.int64
80+
start : int|float (default: 0), range, or other RangeIndex instance
81+
If int|float and "stop" is not given, interpreted as "stop" instead.
82+
stop : int|float (default: 0)
83+
step : int|float (default: 1)
84+
dtype : np.int64|np.float64
8485
Unused, accepted for homogeneity with other index types.
8586
copy : bool, default False
8687
Unused, accepted for homogeneity with other index types.
@@ -101,16 +102,17 @@ class RangeIndex(NumericIndex):
101102
--------
102103
Index : The base pandas Index type.
103104
Int64Index : Index of int64 data.
105+
Float64Index : Index of float64 data
104106
"""
105107

106108
_typ = "rangeindex"
107-
_dtype_validation_metadata = (is_signed_integer_dtype, "signed integer")
108-
_range: range
109+
_dtype_validation_metadata = (is_numeric_dtype, "numeric type")
110+
_range: range | float_range
109111
_is_backward_compat_public_numeric_index: bool = False
110112

111113
@property
112-
def _engine_type(self) -> type[libindex.Int64Engine]:
113-
return libindex.Int64Engine
114+
def _engine_type(self) -> type[libindex.IndexEngine]:
115+
return libindex.Float64Engine if is_float(self.start) else libindex.Int64Engine
114116

115117
# --------------------------------------------------------------------
116118
# Constructors
@@ -135,25 +137,36 @@ def __new__(
135137

136138
# validate the arguments
137139
if com.all_none(start, stop, step):
138-
raise TypeError("RangeIndex(...) must be called with integers")
140+
raise TypeError("RangeIndex(...) must be called with integers/floats")
139141

140-
start = ensure_python_int(start) if start is not None else 0
142+
# check if any of the arguments is a float
143+
coerce_float = any(is_float(attr) for attr in [start, stop, step])
144+
145+
start = ensure_python_intfloat(start, coerce_float) if start is not None else 0
141146

142147
if stop is None:
143-
start, stop = 0, start
148+
start, stop = 0.0 if coerce_float else 0, start
144149
else:
145-
stop = ensure_python_int(stop)
146-
147-
step = ensure_python_int(step) if step is not None else 1
150+
stop = ensure_python_intfloat(stop, coerce_float)
151+
152+
step = (
153+
ensure_python_intfloat(step, coerce_float)
154+
if step is not None
155+
else 1.0
156+
if coerce_float
157+
else 1
158+
)
148159
if step == 0:
149160
raise ValueError("Step must not be zero")
150161

151-
rng = range(start, stop, step)
162+
rng = (
163+
float_range(start, stop, step) if coerce_float else range(start, stop, step)
164+
)
152165
return cls._simple_new(rng, name=name)
153166

154167
@classmethod
155168
def from_range(
156-
cls, data: range, name=None, dtype: Dtype | None = None
169+
cls, data: range | float_range, name=None, dtype: Dtype | None = None
157170
) -> RangeIndex:
158171
"""
159172
Create RangeIndex from a range object.
@@ -162,7 +175,7 @@ def from_range(
162175
-------
163176
RangeIndex
164177
"""
165-
if not isinstance(data, range):
178+
if not isinstance(data, range) and not isinstance(data, float_range):
166179
raise TypeError(
167180
f"{cls.__name__}(...) must be called with object coercible to a "
168181
f"range, {repr(data)} was passed"
@@ -171,10 +184,12 @@ def from_range(
171184
return cls._simple_new(data, name=name)
172185

173186
@classmethod
174-
def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
187+
def _simple_new(
188+
cls, values: range | float_range, name: Hashable = None
189+
) -> RangeIndex:
175190
result = object.__new__(cls)
176191

177-
assert isinstance(values, range)
192+
assert isinstance(values, range) or isinstance(values, float_range)
178193

179194
result._range = values
180195
result._name = name
@@ -184,12 +199,12 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
184199

185200
# --------------------------------------------------------------------
186201

187-
# error: Return type "Type[Int64Index]" of "_constructor" incompatible with return
202+
# error: Return type "Type[NumericIndex]" of "_constructor" incompatible with return
188203
# type "Type[RangeIndex]" in supertype "Index"
189204
@cache_readonly
190-
def _constructor(self) -> type[Int64Index]: # type: ignore[override]
205+
def _constructor(self) -> type[NumericIndex]: # type: ignore[override]
191206
"""return the class to use for construction"""
192-
return Int64Index
207+
return Float64Index if is_float(self.start) else Int64Index
193208

194209
# error: Signature of "_data" incompatible with supertype "Index"
195210
@cache_readonly
@@ -199,7 +214,12 @@ def _data(self) -> np.ndarray: # type: ignore[override]
199214
200215
The constructed array is saved in ``_cache``.
201216
"""
202-
return np.arange(self.start, self.stop, self.step, dtype=np.int64)
217+
return np.arange(
218+
self.start,
219+
self.stop,
220+
self.step,
221+
dtype=np.float64 if is_float(self.start) else np.int64,
222+
)
203223

204224
def _get_data_as_items(self):
205225
"""return a list of tuples of start, stop, step"""
@@ -352,7 +372,7 @@ def memory_usage(self, deep: bool = False) -> int:
352372

353373
@property
354374
def dtype(self) -> np.dtype:
355-
return np.dtype(np.int64)
375+
return np.dtype(np.float64 if is_float(self.start) else np.int64)
356376

357377
@property
358378
def is_unique(self) -> bool:
@@ -370,14 +390,14 @@ def is_monotonic_decreasing(self) -> bool:
370390
def __contains__(self, key: Any) -> bool:
371391
hash(key)
372392
try:
373-
key = ensure_python_int(key)
393+
key = ensure_python_intfloat(key, True)
374394
except TypeError:
375395
return False
376396
return key in self._range
377397

378398
@property
379399
def inferred_type(self) -> str:
380-
return "integer"
400+
return "float" if is_float(self.start) else "integer"
381401

382402
# --------------------------------------------------------------------
383403
# Indexing Methods

pandas/tests/indexes/ranges/test_constructors.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,6 @@ def test_constructor_corner(self):
153153

154154
tm.assert_index_equal(index, expected, exact="equiv")
155155

156-
# non-int raise Exception
156+
# non-int/float raise Exception
157157
with pytest.raises(TypeError, match=r"Wrong type \<class 'str'\>"):
158158
RangeIndex("1", "10", "1")
159-
with pytest.raises(TypeError, match=r"Wrong type \<class 'float'\>"):
160-
RangeIndex(1.1, 10.2, 1.3)
161-
162-
# invalid passed type
163-
with pytest.raises(
164-
ValueError,
165-
match="Incorrect `dtype` passed: expected signed integer, received float64",
166-
):
167-
RangeIndex(1, 5, dtype="float64")

0 commit comments

Comments
 (0)