diff --git a/doc/source/api.rst b/doc/source/api.rst index 149421bde28c8..7fbb432f0be6b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -634,6 +634,14 @@ Serialization / IO / Conversion Series.to_string Series.to_clipboard +Sparse methods +~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + SparseSeries.to_coo + SparseSeries.from_coo + .. _api.dataframe: DataFrame diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index 391aae1cd9105..e72ee6b709282 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -109,10 +109,9 @@ accept scalar values or any 1-dimensional sequence: .. ipython:: python :suppress: - from numpy import nan - .. ipython:: python + from numpy import nan spl.append(np.array([1., nan, nan, 2., 3.])) spl.append(5) spl.append(sparr) @@ -135,3 +134,92 @@ recommend using ``block`` as it's more memory efficient. The ``integer`` format keeps an arrays of all of the locations where the data are not equal to the fill value. The ``block`` format tracks only the locations and sizes of blocks of data. + +.. _sparse.scipysparse: + +Interaction with scipy.sparse +----------------------------- + +Experimental api to transform between sparse pandas and scipy.sparse structures. + +A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``. + +The method requires a ``MultiIndex`` with two or more levels. + +.. ipython:: python + :suppress: + + +.. ipython:: python + + from numpy import nan + s = Series([3.0, nan, 1.0, 3.0, nan, nan]) + s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + + s + # SparseSeries + ss = s.to_sparse() + ss + +In the example below, we transform the ``SparseSeries`` to a sparse representation of a 2-d array by specifying that the first and second ``MultiIndex`` levels define labels for the rows and the third and fourth levels define labels for the columns. We also specify that the column and row labels should be sorted in the final sparse representation. + +.. ipython:: python + + A, rows, columns = ss.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=True) + + A + A.todense() + rows + columns + +Specifying different row and column labels (and not sorting them) yields a different sparse matrix: + +.. ipython:: python + + A, rows, columns = ss.to_coo(row_levels=['A', 'B', 'C'], + column_levels=['D'], + sort_labels=False) + + A + A.todense() + rows + columns + +A convenience method :meth:`SparseSeries.from_coo` is implemented for creating a ``SparseSeries`` from a ``scipy.sparse.coo_matrix``. + +.. ipython:: python + :suppress: + +.. ipython:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + A + A.todense() + +The default behaviour (with ``dense_index=False``) simply returns a ``SparseSeries`` containing +only the non-null entries. + +.. ipython:: python + + ss = SparseSeries.from_coo(A) + ss + +Specifying ``dense_index=True`` will result in an index that is the Cartesian product of the +row and columns coordinates of the matrix. Note that this will consume a significant amount of memory +(relative to ``dense_index=False``) if the sparse matrix is large (and sparse) enough. + +.. ipython:: python + + ss_dense = SparseSeries.from_coo(A, dense_index=True) + ss_dense + diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index bbc006b41a433..ded4040010683 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -194,6 +194,55 @@ Enhancements - ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`) - Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`) +Interaction with scipy.sparse +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods + (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` + instances (see :ref:`here `). + For example, given a SparseSeries with MultiIndex we can convert to a + `scipy.sparse.coo_matrix` by specifying the row and column labels as + index levels: + + .. ipython:: python + + from numpy import nan + s = Series([3.0, nan, 1.0, 3.0, nan, nan]) + s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + + s + # SparseSeries + ss = s.to_sparse() + ss + + A, rows, columns = ss.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=False) + + A + A.todense() + rows + columns + + The from_coo method is a convenience method for creating a ``SparseSeries`` + from a ``scipy.sparse.coo_matrix``: + + .. ipython:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + A + A.todense() + + ss = SparseSeries.from_coo(A) + ss + Performance ~~~~~~~~~~~ diff --git a/pandas/sparse/scipy_sparse.py b/pandas/sparse/scipy_sparse.py new file mode 100644 index 0000000000000..91ec26396b3ec --- /dev/null +++ b/pandas/sparse/scipy_sparse.py @@ -0,0 +1,133 @@ +""" +Interaction with scipy.sparse matrices. + +Currently only includes SparseSeries.to_coo helpers. +""" +from pandas.core.frame import DataFrame +from pandas.core.index import MultiIndex, Index +from pandas.core.series import Series +import itertools +import numpy as np +from pandas.compat import OrderedDict +from pandas.tools.util import cartesian_product + + +def _check_is_partition(parts, whole): + whole = set(whole) + parts = [set(x) for x in parts] + if set.intersection(*parts) != set(): + raise ValueError( + 'Is not a partition because intersection is not null.') + if set.union(*parts) != whole: + raise ValueError('Is not a partition becuase union is not the whole.') + + +def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ For arbitrary (MultiIndexed) SparseSeries return + (v, i, j, ilabels, jlabels) where (v, (i, j)) is suitable for + passing to scipy.sparse.coo constructor. """ + # index and column levels must be a partition of the index + _check_is_partition([row_levels, column_levels], range(ss.index.nlevels)) + + # from the SparseSeries: get the labels and data for non-null entries + values = ss._data.values._valid_sp_values + + nonnull_labels = ss.dropna() + + def get_indexers(levels): + """ Return sparse coords and dense labels for subset levels """ + + # TODO: how to do this better? cleanly slice nonnull_labels given the + # coord + values_ilabels = [tuple(x[i] for i in levels) + for x in nonnull_labels.index] + if len(levels) == 1: + values_ilabels = [x[0] for x in values_ilabels] + + ####################################################################### + # # performance issues with groupby ################################### + # TODO: these two lines can rejplace the code below but + # groupby is too slow (in some cases at least) + # labels_to_i = ss.groupby(level=levels, sort=sort_labels).first() + # labels_to_i[:] = np.arange(labels_to_i.shape[0]) + + def _get_label_to_i_dict(labels, sort_labels=False): + """ Return OrderedDict of unique labels to number. + Optionally sort by label. """ + labels = Index(map(tuple, labels)).unique().tolist() # squish + if sort_labels: + labels = sorted(list(labels)) + d = OrderedDict((k, i) for i, k in enumerate(labels)) + return(d) + + def _get_index_subset_to_coord_dict(index, subset, sort_labels=False): + def robust_get_level_values(i): + # if index has labels (that are not None) use those, + # else use the level location + try: + return(index.get_level_values(index.names[i])) + except KeyError: + return(index.get_level_values(i)) + ilabels = list( + zip(*[robust_get_level_values(i) for i in subset])) + labels_to_i = _get_label_to_i_dict( + ilabels, sort_labels=sort_labels) + labels_to_i = Series(labels_to_i) + labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index) + labels_to_i.index.names = [index.names[i] for i in subset] + labels_to_i.name = 'value' + return(labels_to_i) + + labels_to_i = _get_index_subset_to_coord_dict( + ss.index, levels, sort_labels=sort_labels) + ####################################################################### + ####################################################################### + + i_coord = labels_to_i[values_ilabels].tolist() + i_labels = labels_to_i.index.tolist() + + return i_coord, i_labels + + i_coord, i_labels = get_indexers(row_levels) + j_coord, j_labels = get_indexers(column_levels) + + return values, i_coord, j_coord, i_labels, j_labels + + +def _sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ Convert a SparseSeries to a scipy.sparse.coo_matrix using index + levels row_levels, column_levels as the row and column + labels respectively. Returns the sparse_matrix, row and column labels. """ + + import scipy.sparse + + if ss.index.nlevels < 2: + raise ValueError('to_coo requires MultiIndex with nlevels > 2') + if not ss.index.is_unique: + raise ValueError( + 'Duplicate index entries are not allowed in to_coo transformation.') + + # to keep things simple, only rely on integer indexing (not labels) + row_levels = [ss.index._get_level_number(x) for x in row_levels] + column_levels = [ss.index._get_level_number(x) for x in column_levels] + + v, i, j, rows, columns = _to_ijv( + ss, row_levels=row_levels, column_levels=column_levels, sort_labels=sort_labels) + sparse_matrix = scipy.sparse.coo_matrix( + (v, (i, j)), shape=(len(rows), len(columns))) + return sparse_matrix, rows, columns + + +def _coo_to_sparse_series(A, dense_index=False): + """ Convert a scipy.sparse.coo_matrix to a SparseSeries. + Use the defaults given in the SparseSeries constructor. """ + s = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) + s = s.sort_index() + s = s.to_sparse() # TODO: specify kind? + if dense_index: + # is there a better constructor method to use here? + i = range(A.shape[0]) + j = range(A.shape[1]) + ind = MultiIndex.from_product([i, j]) + s = s.reindex_axis(ind) + return s diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index bcf9606c3748f..2c328e51b5090 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -29,12 +29,14 @@ from pandas.util.decorators import Appender +from pandas.sparse.scipy_sparse import _sparse_series_to_coo, _coo_to_sparse_series + #------------------------------------------------------------------------------ # Wrapper function for Series arithmetic methods def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, - **eval_kwargs): + **eval_kwargs): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -115,7 +117,7 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', if copy: data = data.copy() else: - + if data is None: data = [] @@ -657,6 +659,98 @@ def combine_first(self, other): dense_combined = self.to_dense().combine_first(other) return dense_combined.to_sparse(fill_value=self.fill_value) + def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ + Create a scipy.sparse.coo_matrix from a SparseSeries with MultiIndex. + + Use row_levels and column_levels to determine the row and column coordinates respectively. + row_levels and column_levels are the names (labels) or numbers of the levels. + {row_levels, column_levels} must be a partition of the MultiIndex level names (or numbers). + + Parameters + ---------- + row_levels : tuple/list + column_levels : tuple/list + sort_labels : bool, default False + Sort the row and column labels before forming the sparse matrix. + + Returns + ------- + y : scipy.sparse.coo_matrix + rows : list (row labels) + columns : list (column labels) + + Examples + -------- + >>> from numpy import nan + >>> s = Series([3.0, nan, 1.0, 3.0, nan, nan]) + >>> s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + >>> ss = s.to_sparse() + >>> A, rows, columns = ss.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=True) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + >>> A.todense() + matrix([[ 0., 0., 1., 3.], + [ 3., 0., 0., 0.], + [ 0., 0., 0., 0.]]) + >>> rows + [(1, 1), (1, 2), (2, 1)] + >>> columns + [('a', 0), ('a', 1), ('b', 0), ('b', 1)] + """ + A, rows, columns = _sparse_series_to_coo( + self, row_levels, column_levels, sort_labels=sort_labels) + return A, rows, columns + + @classmethod + def from_coo(cls, A, dense_index=False): + """ + Create a SparseSeries from a scipy.sparse.coo_matrix. + + Parameters + ---------- + A : scipy.sparse.coo_matrix + dense_index : bool, default False + If False (default), the SparseSeries index consists of only the coords of the non-null entries of the original coo_matrix. + If True, the SparseSeries index consists of the full sorted (row, col) coordinates of the coo_matrix. + + Returns + ------- + s : SparseSeries + + Examples + --------- + >>> from scipy import sparse + >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + >>> A.todense() + matrix([[ 0., 0., 1., 2.], + [ 3., 0., 0., 0.], + [ 0., 0., 0., 0.]]) + >>> ss = SparseSeries.from_coo(A) + >>> ss + 0 2 1 + 3 2 + 1 0 3 + dtype: float64 + BlockIndex + Block locations: array([0], dtype=int32) + Block lengths: array([3], dtype=int32) + """ + return _coo_to_sparse_series(A, dense_index=dense_index) + # overwrite series methods with unaccelerated versions ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, **ops.series_special_funcs) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index eebe822ae74c0..b0cd81ce4d111 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -2,6 +2,7 @@ import operator from datetime import datetime +import functools import nose @@ -11,10 +12,10 @@ dec = np.testing.dec from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, assert_panel_equal, assertRaisesRegexp) + assert_frame_equal, assert_panel_equal, assertRaisesRegexp, assert_array_equal) from numpy.testing import assert_equal -from pandas import Series, DataFrame, bdate_range, Panel +from pandas import Series, DataFrame, bdate_range, Panel, MultiIndex from pandas.core.datetools import BDay from pandas.core.index import Index from pandas.tseries.index import DatetimeIndex @@ -23,6 +24,7 @@ import pandas.util.testing as tm from pandas.compat import range, lrange, StringIO, lrange from pandas import compat +from pandas.tools.util import cartesian_product import pandas.sparse.frame as spf @@ -30,7 +32,6 @@ from pandas.sparse.api import (SparseSeries, SparseTimeSeries, SparseDataFrame, SparsePanel, SparseArray) - import pandas.tests.test_frame as test_frame import pandas.tests.test_panel as test_panel import pandas.tests.test_series as test_series @@ -168,7 +169,7 @@ def test_construct_DataFrame_with_sp_series(self): assert_sp_series_equal(df['col'], self.bseries) - result = df.iloc[:,0] + result = df.iloc[:, 0] assert_sp_series_equal(result, self.bseries) # blocking @@ -748,6 +749,126 @@ def test_combine_first(self): assert_sp_series_equal(result, expected) +class TestSparseSeriesScipyInteraction(tm.TestCase): + # Issue 8048: add SparseSeries coo methods + + def setUp(self): + tm._skip_if_no_scipy() + import scipy.sparse + # SparseSeries inputs used in tests, the tests rely on the order + self.sparse_series = [] + s = pd.Series([3.0, nan, 1.0, 2.0, nan, nan]) + s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + self.sparse_series.append(s.to_sparse()) + + ss = self.sparse_series[0].copy() + ss.index.names = [3, 0, 1, 2] + self.sparse_series.append(ss) + + ss = pd.Series( + [nan] * 12, index=cartesian_product((range(3), range(4)))).to_sparse() + for k, v in zip([(0, 0), (1, 2), (1, 3)], [3.0, 1.0, 2.0]): + ss[k] = v + self.sparse_series.append(ss) + + # results used in tests + self.coo_matrices = [] + self.coo_matrices.append(scipy.sparse.coo_matrix( + ([3.0, 1.0, 2.0], ([0, 1, 1], [0, 2, 3])), shape=(3, 4))) + self.coo_matrices.append(scipy.sparse.coo_matrix( + ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4))) + self.ils = [[(1, 2), (1, 1), (2, 1)], [(1, 1), (1, 2), (2, 1)]] + self.jls = [[('a', 0), ('a', 1), ('b', 0), ('b', 1)]] + + def test_to_coo_text_names_integer_row_levels_nosort(self): + ss = self.sparse_series[0] + kwargs = {'row_levels': [0, 1], 'column_levels': [2, 3]} + result = (self.coo_matrices[0], self.ils[0], self.jls[0]) + self._run_test(ss, kwargs, result) + + def test_to_coo_text_names_integer_row_levels_sort(self): + ss = self.sparse_series[0] + kwargs = {'row_levels': [0, 1], + 'column_levels': [2, 3], 'sort_labels': True} + result = (self.coo_matrices[1], self.ils[1], self.jls[0]) + self._run_test(ss, kwargs, result) + + def test_to_coo_integer_names_integer_row_levels_nosort(self): + ss = self.sparse_series[1] + kwargs = {'row_levels': [3, 0], 'column_levels': [1, 2]} + result = (self.coo_matrices[0], self.ils[0], self.jls[0]) + self._run_test(ss, kwargs, result) + + def test_to_coo_text_names_text_row_levels_nosort(self): + ss = self.sparse_series[0] + kwargs = {'row_levels': ['A', 'B'], 'column_levels': ['C', 'D']} + result = (self.coo_matrices[0], self.ils[0], self.jls[0]) + self._run_test(ss, kwargs, result) + + def test_to_coo_bad_partition_nonnull_intersection(self): + ss = self.sparse_series[0] + self.assertRaises(ValueError, ss.to_coo, ['A', 'B', 'C'], ['C', 'D']) + + def test_to_coo_bad_partition_small_union(self): + ss = self.sparse_series[0] + self.assertRaises(ValueError, ss.to_coo, ['A'], ['C', 'D']) + + def test_to_coo_nlevels_less_than_two(self): + ss = self.sparse_series[0] + ss.index = np.arange(len(ss.index)) + self.assertRaises(ValueError, ss.to_coo) + + def test_to_coo_bad_ilevel(self): + ss = self.sparse_series[0] + self.assertRaises(KeyError, ss.to_coo, ['A', 'B'], ['C', 'D', 'E']) + + def test_to_coo_duplicate_index_entries(self): + ss = pd.concat( + [self.sparse_series[0], self.sparse_series[0]]).to_sparse() + self.assertRaises(ValueError, ss.to_coo, ['A', 'B'], ['C', 'D']) + + def test_from_coo_dense_index(self): + ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=True) + check = self.sparse_series[2] + assert_sp_series_equal(ss, check) + + def test_from_coo_nodense_index(self): + ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=False) + check = self.sparse_series[2] + check = check.dropna().to_sparse() + assert_sp_series_equal(ss, check) + + def _run_test(self, ss, kwargs, check): + results = ss.to_coo(**kwargs) + self._check_results_to_coo(results, check) + # for every test, also test symmetry property (transpose), switch + # row_levels and column_levels + d = kwargs.copy() + d['row_levels'] = kwargs['column_levels'] + d['column_levels'] = kwargs['row_levels'] + results = ss.to_coo(**d) + results = (results[0].T, results[2], results[1]) + self._check_results_to_coo(results, check) + + @staticmethod + def _check_results_to_coo(results, check): + (A, il, jl) = results + (A_result, il_result, jl_result) = check + # convert to dense and compare + assert_array_equal(A.todense(), A_result.todense()) + # or compare directly as difference of sparse + # assert(abs(A - A_result).max() < 1e-12) # max is failing in python + # 2.6 + assert_equal(il, il_result) + assert_equal(jl, jl_result) + + class TestSparseTimeSeries(tm.TestCase): pass @@ -888,9 +1009,9 @@ def test_constructor_from_series(self): # GH 2873 x = Series(np.random.randn(10000), name='a') x = x.to_sparse(fill_value=0) - tm.assert_isinstance(x,SparseSeries) + tm.assert_isinstance(x, SparseSeries) df = SparseDataFrame(x) - tm.assert_isinstance(df,SparseDataFrame) + tm.assert_isinstance(df, SparseDataFrame) x = Series(np.random.randn(10000), name='a') y = Series(np.random.randn(10000), name='b') @@ -1090,7 +1211,7 @@ def test_icol(self): data = {'A': [0, 1]} iframe = SparseDataFrame(data, default_kind='integer') self.assertEqual(type(iframe['A'].sp_index), - type(iframe.icol(0).sp_index)) + type(iframe.icol(0).sp_index)) def test_set_value(self): diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index 334534ed466f2..77e9044e2a40c 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -500,9 +500,9 @@ def get_data(n=100000): frame_from_records_generator = Benchmark('df = DataFrame.from_records(get_data())', setup, name='frame_from_records_generator', - start_date=datetime(2013,10,04)) # issue-4911 + start_date=datetime(2013,10,4)) # issue-4911 frame_from_records_generator_nrows = Benchmark('df = DataFrame.from_records(get_data(), nrows=1000)', setup, name='frame_from_records_generator_nrows', - start_date=datetime(2013,10,04)) # issue-4911 + start_date=datetime(2013,10,4)) # issue-4911 diff --git a/vb_suite/sparse.py b/vb_suite/sparse.py index 1cb0f9233f7e9..e591b197d3384 100644 --- a/vb_suite/sparse.py +++ b/vb_suite/sparse.py @@ -37,3 +37,29 @@ sparse_constructor = Benchmark(stmt, setup, name="sparse_frame_constructor", start_date=datetime(2012, 6, 1)) + + +setup = common_setup + """ +s = pd.Series([nan] * 10000) +s[0] = 3.0 +s[100] = -1.0 +s[999] = 12.1 +s.index = pd.MultiIndex.from_product((range(10), range(10), range(10), range(10))) +ss = s.to_sparse() +""" + +stmt = "ss.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True)" + +sparse_series_to_coo = Benchmark(stmt, setup, name="sparse_series_to_coo", + start_date=datetime(2015, 1, 3)) + +setup = common_setup + """ +import scipy.sparse +import pandas.sparse.series +A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100)) +""" + +stmt = "ss = pandas.sparse.series.from_coo(A)" + +sparse_series_from_coo = Benchmark(stmt, setup, name="sparse_series_from_coo", + start_date=datetime(2015, 1, 3)) diff --git a/vb_suite/stat_ops.py b/vb_suite/stat_ops.py index f4ea6706c193c..544ad6d00ed37 100644 --- a/vb_suite/stat_ops.py +++ b/vb_suite/stat_ops.py @@ -86,9 +86,9 @@ start_date=datetime(2011, 12, 12)) stats_rank_pct_average = Benchmark('s.rank(pct=True)', setup, - start_date=datetime(2014, 01, 16)) + start_date=datetime(2014, 1, 16)) stats_rank_pct_average_old = Benchmark('s.rank() / len(s)', setup, - start_date=datetime(2014, 01, 16)) + start_date=datetime(2014, 1, 16)) setup = common_setup + """ values = np.random.randint(0, 100000, size=200000) s = Series(values)