Skip to content

Commit 86090bf

Browse files
committed
fix float casting
1 parent c75461c commit 86090bf

File tree

3 files changed

+18
-33
lines changed

3 files changed

+18
-33
lines changed

pandas/core/arrays/sparse.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
infer_dtype_from_scalar)
2626
from pandas.core.dtypes.common import (
2727
is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
28-
is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
28+
is_float_dtype, is_integer, is_integer_dtype, is_object_dtype, is_scalar,
29+
is_string_dtype, pandas_dtype)
2930
from pandas.core.dtypes.dtypes import register_extension_dtype
3031
from pandas.core.dtypes.generic import (
3132
ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries)
@@ -1927,15 +1928,24 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
19271928
index = _make_index(length, indices, kind)
19281929
sparsified_values = arr[mask]
19291930

1930-
# careful about casting here as we could easily specify a type that
1931-
# cannot hold the resulting values, e.g. integer when we have floats
1932-
# if we don't have an object specified then use this as the cast
19331931
if dtype is not None:
19341932

1935-
ok_to_cast = all(not (is_object_dtype(t) or is_bool_dtype(t))
1936-
for t in (dtype, sparsified_values.dtype))
1937-
if ok_to_cast:
1933+
# careful about casting here as we could easily specify a type that
1934+
# cannot hold the resulting values, e.g. integer when we have floats
1935+
# if this is not safe then convert the dtype; note that if there are
1936+
# nan's in the source array this will raise
1937+
1938+
# TODO: ideally this would be done by 'safe' casting in astype_nansafe
1939+
# but alas too many cases rely upon this working in the current way
1940+
# and casting='safe' doesn't really work in numpy properly
1941+
if is_integer_dtype(dtype) and is_float_dtype(sparsified_values.dtype):
1942+
result = astype_nansafe(
1943+
sparsified_values, dtype=dtype)
1944+
if np.allclose(result, sparsified_values, rtol=0):
1945+
return result, index, fill_value
1946+
19381947
dtype = find_common_type([dtype, sparsified_values.dtype])
1948+
19391949
sparsified_values = astype_nansafe(
19401950
sparsified_values, dtype=dtype)
19411951

pandas/tests/arrays/sparse/test_array.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ def test_astype(self):
472472
# float -> float
473473
arr = SparseArray([None, None, 0, 2])
474474
result = arr.astype("Sparse[float32]")
475+
475476
expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
476477
tm.assert_sp_array_equal(result, expected)
477478

pandas/tests/extension/test_sparse.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -154,32 +154,6 @@ def test_reindex(self, data, na_value):
154154
self._check_unsupported(data)
155155
super().test_reindex(data, na_value)
156156

157-
def test_getitem_mask(self, data):
158-
# Empty mask, raw array
159-
mask = np.zeros(len(data), dtype=bool)
160-
result = data[mask]
161-
assert len(result) == 0
162-
assert isinstance(result, type(data))
163-
164-
# Empty mask, in series
165-
mask = np.zeros(len(data), dtype=bool)
166-
result = pd.Series(data)[mask]
167-
assert len(result) == 0
168-
169-
# we change int -> float because of the masking
170-
assert result.dtype == SparseDtype('float64', data.dtype.fill_value)
171-
172-
# non-empty mask, raw array
173-
mask[0] = True
174-
result = data[mask]
175-
assert len(result) == 1
176-
assert isinstance(result, type(data))
177-
178-
# non-empty mask, in series
179-
result = pd.Series(data)[mask]
180-
assert len(result) == 1
181-
assert result.dtype == data.dtype
182-
183157

184158
# Skipping TestSetitem, since we don't implement it.
185159

0 commit comments

Comments
 (0)