Skip to content

Commit 029f707

Browse files
authored
Disallow lossy SparseArray conversion (#32501)
1 parent 8111d64 commit 029f707

File tree

2 files changed

+34
-4
lines changed

2 files changed

+34
-4
lines changed

pandas/core/arrays/sparse/array.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
is_array_like,
2828
is_bool_dtype,
2929
is_datetime64_any_dtype,
30+
is_datetime64tz_dtype,
3031
is_dtype_equal,
3132
is_integer,
3233
is_object_dtype,
@@ -42,7 +43,7 @@
4243
from pandas.core.arrays.sparse.dtype import SparseDtype
4344
from pandas.core.base import PandasObject
4445
import pandas.core.common as com
45-
from pandas.core.construction import sanitize_array
46+
from pandas.core.construction import extract_array, sanitize_array
4647
from pandas.core.indexers import check_array_indexer
4748
from pandas.core.missing import interpolate_2d
4849
import pandas.core.ops as ops
@@ -312,7 +313,7 @@ def __init__(
312313
dtype = dtype.subtype
313314

314315
if index is not None and not is_scalar(data):
315-
raise Exception("must only pass scalars with an index ")
316+
raise Exception("must only pass scalars with an index")
316317

317318
if is_scalar(data):
318319
if index is not None:
@@ -367,6 +368,19 @@ def __init__(
367368
sparse_index = data._sparse_index
368369
sparse_values = np.asarray(data.sp_values, dtype=dtype)
369370
elif sparse_index is None:
371+
data = extract_array(data, extract_numpy=True)
372+
if not isinstance(data, np.ndarray):
373+
# EA
374+
if is_datetime64tz_dtype(data.dtype):
375+
warnings.warn(
376+
f"Creating SparseArray from {data.dtype} data "
377+
"loses timezone information. Cast to object before "
378+
"sparse to retain timezone information.",
379+
UserWarning,
380+
stacklevel=2,
381+
)
382+
data = np.asarray(data, dtype="datetime64[ns]")
383+
data = np.asarray(data)
370384
sparse_values, sparse_index, fill_value = make_sparse(
371385
data, kind=kind, fill_value=fill_value, dtype=dtype
372386
)
@@ -1497,7 +1511,7 @@ def _formatter(self, boxed=False):
14971511
SparseArray._add_unary_ops()
14981512

14991513

1500-
def make_sparse(arr, kind="block", fill_value=None, dtype=None, copy=False):
1514+
def make_sparse(arr: np.ndarray, kind="block", fill_value=None, dtype=None, copy=False):
15011515
"""
15021516
Convert ndarray to sparse format
15031517
@@ -1513,7 +1527,7 @@ def make_sparse(arr, kind="block", fill_value=None, dtype=None, copy=False):
15131527
-------
15141528
(sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar)
15151529
"""
1516-
arr = com.values_from_object(arr)
1530+
assert isinstance(arr, np.ndarray)
15171531

15181532
if arr.ndim > 1:
15191533
raise TypeError("expected dimension <= 1 data")

pandas/tests/arrays/sparse/test_array.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,22 @@ def test_constructor_na_dtype(self, dtype):
9696
with pytest.raises(ValueError, match="Cannot convert"):
9797
SparseArray([0, 1, np.nan], dtype=dtype)
9898

99+
def test_constructor_warns_when_losing_timezone(self):
100+
# GH#32501 warn when losing timezone inforamtion
101+
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
102+
103+
expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
104+
105+
with tm.assert_produces_warning(UserWarning):
106+
result = SparseArray(dti)
107+
108+
tm.assert_sp_array_equal(result, expected)
109+
110+
with tm.assert_produces_warning(UserWarning):
111+
result = SparseArray(pd.Series(dti))
112+
113+
tm.assert_sp_array_equal(result, expected)
114+
99115
def test_constructor_spindex_dtype(self):
100116
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
101117
# XXX: Behavior change: specifying SparseIndex no longer changes the

0 commit comments

Comments
 (0)