diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 69965f44d87a8..e8d2ec5eb0d9e 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -555,7 +555,7 @@ Sparse - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) -- +- Bug in :class:`SparseArray.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/base.py b/pandas/core/base.py index 54d25a16a10a3..d5b204dba063e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1048,7 +1048,7 @@ def is_monotonic_decreasing(self): def memory_usage(self, deep=False): """ - Memory usage of my values + Memory usage of the values Parameters ---------- diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index fa07400a0706e..65aefd9fb8c0a 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -8,10 +8,10 @@ import warnings import pandas as pd -from pandas.core.base import PandasObject +from pandas.core.base import PandasObject, IndexOpsMixin from pandas import compat -from pandas.compat import range +from pandas.compat import range, PYPY from pandas.compat.numpy import function as nv from pandas.core.dtypes.generic import ABCSparseSeries @@ -30,6 +30,7 @@ from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype import pandas._libs.sparse as splib +import pandas._libs.lib as lib from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex from pandas._libs import index as libindex import pandas.core.algorithms as algos @@ -238,6 +239,17 @@ def kind(self): elif isinstance(self.sp_index, IntIndex): return 'integer' + @Appender(IndexOpsMixin.memory_usage.__doc__) + def memory_usage(self, deep=False): + values = self.sp_values + + v = values.nbytes + + if deep and is_object_dtype(self) and not PYPY: + v += lib.memory_usage_of_objects(values) + + return v + def __array_wrap__(self, out_arr, context=None): """ NumPy calls this method when ufunc is applied diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 2ea1e63433520..3f5d5a59cc540 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -23,6 +23,8 @@ from pandas.core.sparse.api import SparseSeries from pandas.tests.series.test_api import SharedWithSparse +from itertools import product + def _test_data1(): # nan-based @@ -971,6 +973,17 @@ def test_combine_first(self): tm.assert_sp_series_equal(result, result2) tm.assert_sp_series_equal(result, expected) + @pytest.mark.parametrize('deep,fill_values', [([True, False], + [0, 1, np.nan, None])]) + def test_memory_usage_deep(self, deep, fill_values): + for deep, fill_value in product(deep, fill_values): + sparse_series = SparseSeries(fill_values, fill_value=fill_value) + dense_series = Series(fill_values) + sparse_usage = sparse_series.memory_usage(deep=deep) + dense_usage = dense_series.memory_usage(deep=deep) + + assert sparse_usage < dense_usage + class TestSparseHandlingMultiIndexes(object):