Skip to content

PEP: pandas/sparse cleanup #12116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/sparse/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# pylint: disable=W0611

# flake8: noqa
from pandas.sparse.array import SparseArray
from pandas.sparse.list import SparseList
from pandas.sparse.series import SparseSeries, SparseTimeSeries
Expand Down
107 changes: 51 additions & 56 deletions pandas/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
import pandas.core.ops as ops


def _arith_method(op, name, str_rep=None, default_axis=None,
fill_zeros=None, **eval_kwargs):
def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None,
**eval_kwargs):
"""
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""

def wrapper(self, other):
if isinstance(other, np.ndarray):
if len(self) != len(other):
Expand All @@ -37,14 +38,14 @@ def wrapper(self, other):
else:
return _sparse_array_op(self, other, op, name)
elif np.isscalar(other):
new_fill_value = op(np.float64(self.fill_value),
np.float64(other))
new_fill_value = op(np.float64(self.fill_value), np.float64(other))

return SparseArray(op(self.sp_values, other),
sparse_index=self.sp_index,
fill_value=new_fill_value)
else: # pragma: no cover
raise TypeError('operation with %s not supported' % type(other))

if name.startswith("__"):
name = name[2:-2]
wrapper.__name__ = name
Expand Down Expand Up @@ -74,44 +75,38 @@ def _sparse_array_op(left, right, op, name):

def _sparse_nanop(this, other, name):
sparse_op = getattr(splib, 'sparse_nan%s' % name)
result, result_index = sparse_op(this.sp_values,
this.sp_index,
other.sp_values,
other.sp_index)
result, result_index = sparse_op(this.sp_values, this.sp_index,
other.sp_values, other.sp_index)

return result, result_index


def _sparse_fillop(this, other, name):
sparse_op = getattr(splib, 'sparse_%s' % name)
result, result_index = sparse_op(this.sp_values,
this.sp_index,
this.fill_value,
other.sp_values,
other.sp_index,
other.fill_value)
result, result_index = sparse_op(this.sp_values, this.sp_index,
this.fill_value, other.sp_values,
other.sp_index, other.fill_value)

return result, result_index


class SparseArray(PandasObject, np.ndarray):

"""Data structure for labeled, sparse floating point data

Parameters
----------
data : {array-like, Series, SparseSeries, dict}
kind : {'block', 'integer'}
fill_value : float
Defaults to NaN (code for missing)
sparse_index : {BlockIndex, IntIndex}, optional
Only if you have one. Mainly used internally

Notes
-----
SparseArray objects are immutable via the typical Python means. If you
must change values, convert to dense, make your changes, then convert back
to sparse
Parameters
----------
data : {array-like, Series, SparseSeries, dict}
kind : {'block', 'integer'}
fill_value : float
Defaults to NaN (code for missing)
sparse_index : {BlockIndex, IntIndex}, optional
Only if you have one. Mainly used internally

Notes
-----
SparseArray objects are immutable via the typical Python means. If you
must change values, convert to dense, make your changes, then convert back
to sparse
"""
__array_priority__ = 15
_typ = 'array'
Expand All @@ -120,9 +115,8 @@ class SparseArray(PandasObject, np.ndarray):
sp_index = None
fill_value = None

def __new__(
cls, data, sparse_index=None, index=None, kind='integer', fill_value=None,
dtype=np.float64, copy=False):
def __new__(cls, data, sparse_index=None, index=None, kind='integer',
fill_value=None, dtype=np.float64, copy=False):

if index is not None:
if data is None:
Expand Down Expand Up @@ -164,7 +158,8 @@ def __new__(
subarr = np.asarray(values, dtype=dtype)

# if we have a bool type, make sure that we have a bool fill_value
if (dtype is not None and issubclass(dtype.type, np.bool_)) or (data is not None and lib.is_bool_array(subarr)):
if ((dtype is not None and issubclass(dtype.type, np.bool_)) or
(data is not None and lib.is_bool_array(subarr))):
if np.isnan(fill_value) or not fill_value:
fill_value = False
else:
Expand Down Expand Up @@ -284,9 +279,9 @@ def __getitem__(self, key):
else:
if isinstance(key, SparseArray):
key = np.asarray(key)
if hasattr(key,'__len__') and len(self) != len(key):
if hasattr(key, '__len__') and len(self) != len(key):
indices = self.sp_index
if hasattr(indices,'to_int_index'):
if hasattr(indices, 'to_int_index'):
indices = indices.to_int_index()
data_slice = self.values.take(indices.indices)[key]
else:
Expand Down Expand Up @@ -355,7 +350,8 @@ def __setitem__(self, key, value):
# if com.is_integer(key):
# self.values[key] = value
# else:
# raise Exception("SparseArray does not support seting non-scalars via setitem")
# raise Exception("SparseArray does not support seting non-scalars
# via setitem")
raise TypeError(
"SparseArray does not support item assignment via setitem")

Expand All @@ -364,16 +360,17 @@ def __setslice__(self, i, j, value):
i = 0
if j < 0:
j = 0
slobj = slice(i, j)
slobj = slice(i, j) # noqa

# if not np.isscalar(value):
# raise Exception("SparseArray does not support seting non-scalars via slices")
# raise Exception("SparseArray does not support seting non-scalars
# via slices")

#x = self.values
#x[slobj] = value
#self.values = x
raise TypeError(
"SparseArray does not support item assignment via slices")
# x = self.values
# x[slobj] = value
# self.values = x
raise TypeError("SparseArray does not support item assignment via "
"slices")

def astype(self, dtype=None):
"""
Expand All @@ -394,8 +391,7 @@ def copy(self, deep=True):
else:
values = self.sp_values
return SparseArray(values, sparse_index=self.sp_index,
dtype=self.dtype,
fill_value=self.fill_value)
dtype=self.dtype, fill_value=self.fill_value)

def count(self):
"""
Expand Down Expand Up @@ -453,8 +449,7 @@ def cumsum(self, axis=0, dtype=None, out=None):
if com.notnull(self.fill_value):
return self.to_dense().cumsum()
# TODO: what if sp_values contains NaN??
return SparseArray(self.sp_values.cumsum(),
sparse_index=self.sp_index,
return SparseArray(self.sp_values.cumsum(), sparse_index=self.sp_index,
fill_value=self.fill_value)

def mean(self, axis=None, dtype=None, out=None):
Expand Down Expand Up @@ -485,8 +480,8 @@ def _maybe_to_dense(obj):

def _maybe_to_sparse(array):
if isinstance(array, com.ABCSparseSeries):
array = SparseArray(
array.values, sparse_index=array.sp_index, fill_value=array.fill_value, copy=True)
array = SparseArray(array.values, sparse_index=array.sp_index,
fill_value=array.fill_value, copy=True)
if not isinstance(array, SparseArray):
array = com._values_from_object(array)
return array
Expand Down Expand Up @@ -538,15 +533,15 @@ def make_sparse(arr, kind='block', fill_value=nan):
sparsified_values = arr[mask]
return sparsified_values, index

ops.add_special_arithmetic_methods(SparseArray,
arith_method=_arith_method,
use_numexpr=False)

ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method,
use_numexpr=False)


def _concat_compat(to_concat, axis=0):
"""
provide concatenation of an sparse/dense array of arrays each of which is a single dtype
provide concatenation of an sparse/dense array of arrays each of which is a
single dtype

Parameters
----------
Expand All @@ -570,10 +565,10 @@ def convert_sparse(x, axis):
typs = com.get_dtype_kinds(to_concat)

# we have more than one type here, so densify and regular concat
to_concat = [ convert_sparse(x, axis) for x in to_concat ]
result = np.concatenate(to_concat,axis=axis)
to_concat = [convert_sparse(x, axis) for x in to_concat]
result = np.concatenate(to_concat, axis=axis)

if not len(typs-set(['sparse','f','i'])):
if not len(typs - set(['sparse', 'f', 'i'])):

# we can remain sparse
result = SparseArray(result.ravel())
Expand Down
Loading