Skip to content

Commit e581b14

Browse files
mroeschkejreback
authored andcommitted
CLN: ASV sparse (#19047)
1 parent 2ec0949 commit e581b14

File tree

1 file changed

+110
-159
lines changed

1 file changed

+110
-159
lines changed

asv_bench/benchmarks/sparse.py

Lines changed: 110 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -1,211 +1,162 @@
11
import itertools
22

3-
from .pandas_vb_common import *
3+
import numpy as np
44
import scipy.sparse
5-
from pandas import SparseSeries, SparseDataFrame, SparseArray
5+
from pandas import (SparseSeries, SparseDataFrame, SparseArray, Series,
6+
date_range, MultiIndex)
67

8+
from .pandas_vb_common import setup # noqa
79

8-
class sparse_series_to_frame(object):
9-
goal_time = 0.2
1010

11-
def setup(self):
12-
self.K = 50
13-
self.N = 50000
14-
self.rng = np.asarray(date_range('1/1/2000', periods=self.N, freq='T'))
15-
self.series = {}
16-
for i in range(1, (self.K + 1)):
17-
self.data = np.random.randn(self.N)[:(- i)]
18-
self.this_rng = self.rng[:(- i)]
19-
self.data[100:] = np.nan
20-
self.series[i] = SparseSeries(self.data, index=self.this_rng)
11+
def make_array(size, dense_proportion, fill_value, dtype):
12+
dense_size = int(size * dense_proportion)
13+
arr = np.full(size, fill_value, dtype)
14+
indexer = np.random.choice(np.arange(size), dense_size, replace=False)
15+
arr[indexer] = np.random.choice(np.arange(100, dtype=dtype), dense_size)
16+
return arr
2117

22-
def time_sparse_series_to_frame(self):
23-
SparseDataFrame(self.series)
2418

19+
class SparseSeriesToFrame(object):
2520

26-
class sparse_array_constructor(object):
2721
goal_time = 0.2
2822

2923
def setup(self):
30-
np.random.seed(1)
31-
self.int64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=0, dtype=np.int64)
32-
self.int64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=0, dtype=np.int64)
33-
34-
self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
35-
self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
36-
37-
self.object_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=np.nan)
38-
self.object_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=np.nan)
39-
40-
self.object_non_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
41-
self.object_non_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
42-
43-
def make_numeric_array(self, length, dense_size, fill_value, dtype):
44-
arr = np.array([fill_value] * length, dtype=dtype)
45-
indexer = np.unique(np.random.randint(0, length, dense_size))
46-
arr[indexer] = np.random.randint(0, 100, len(indexer))
47-
return (arr, fill_value, dtype)
48-
49-
def make_object_array(self, length, dense_size, fill_value):
50-
elems = np.array(['a', 0.0, False, 1, 2], dtype=np.object)
51-
arr = np.array([fill_value] * length, dtype=np.object)
52-
indexer = np.unique(np.random.randint(0, length, dense_size))
53-
arr[indexer] = np.random.choice(elems, len(indexer))
54-
return (arr, fill_value, np.object)
55-
56-
def time_sparse_array_constructor_int64_10percent(self):
57-
arr, fill_value, dtype = self.int64_10percent
58-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
59-
60-
def time_sparse_array_constructor_int64_1percent(self):
61-
arr, fill_value, dtype = self.int64_1percent
62-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
63-
64-
def time_sparse_array_constructor_float64_10percent(self):
65-
arr, fill_value, dtype = self.float64_10percent
66-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
67-
68-
def time_sparse_array_constructor_float64_1percent(self):
69-
arr, fill_value, dtype = self.float64_1percent
70-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
71-
72-
def time_sparse_array_constructor_object_nan_fill_value_10percent(self):
73-
arr, fill_value, dtype = self.object_nan_fill_value_10percent
74-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
75-
76-
def time_sparse_array_constructor_object_nan_fill_value_1percent(self):
77-
arr, fill_value, dtype = self.object_nan_fill_value_1percent
78-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
24+
K = 50
25+
N = 50001
26+
rng = date_range('1/1/2000', periods=N, freq='T')
27+
self.series = {}
28+
for i in range(1, K):
29+
data = np.random.randn(N)[:-i]
30+
idx = rng[:-i]
31+
data[100:] = np.nan
32+
self.series[i] = SparseSeries(data, index=idx)
7933

80-
def time_sparse_array_constructor_object_non_nan_fill_value_10percent(self):
81-
arr, fill_value, dtype = self.object_non_nan_fill_value_10percent
82-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
34+
def time_series_to_frame(self):
35+
SparseDataFrame(self.series)
8336

84-
def time_sparse_array_constructor_object_non_nan_fill_value_1percent(self):
85-
arr, fill_value, dtype = self.object_non_nan_fill_value_1percent
86-
SparseArray(arr, fill_value=fill_value, dtype=dtype)
8737

38+
class SparseArrayConstructor(object):
8839

89-
class sparse_frame_constructor(object):
9040
goal_time = 0.2
41+
params = ([0.1, 0.01], [0, np.nan],
42+
[np.int64, np.float64, np.object])
43+
param_names = ['dense_proportion', 'fill_value', 'dtype']
9144

92-
def time_sparse_frame_constructor(self):
93-
SparseDataFrame(columns=np.arange(100), index=np.arange(1000))
45+
def setup(self, dense_proportion, fill_value, dtype):
46+
N = 10**6
47+
self.array = make_array(N, dense_proportion, fill_value, dtype)
9448

95-
def time_sparse_from_scipy(self):
96-
SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
49+
def time_sparse_array(self, dense_proportion, fill_value, dtype):
50+
SparseArray(self.array, fill_value=fill_value, dtype=dtype)
9751

98-
def time_sparse_from_dict(self):
99-
SparseDataFrame(dict(zip(range(1000), itertools.repeat([0]))))
10052

53+
class SparseDataFrameConstructor(object):
10154

102-
class sparse_series_from_coo(object):
10355
goal_time = 0.2
10456

10557
def setup(self):
106-
self.A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100))
58+
N = 1000
59+
self.arr = np.arange(N)
60+
self.sparse = scipy.sparse.rand(N, N, 0.005)
61+
self.dict = dict(zip(range(N), itertools.repeat([0])))
10762

108-
def time_sparse_series_from_coo(self):
109-
self.ss = SparseSeries.from_coo(self.A)
63+
def time_constructor(self):
64+
SparseDataFrame(columns=self.arr, index=self.arr)
11065

66+
def time_from_scipy(self):
67+
SparseDataFrame(self.sparse)
11168

112-
class sparse_series_to_coo(object):
113-
goal_time = 0.2
69+
def time_from_dict(self):
70+
SparseDataFrame(self.dict)
11471

115-
def setup(self):
116-
self.s = pd.Series(([np.nan] * 10000))
117-
self.s[0] = 3.0
118-
self.s[100] = (-1.0)
119-
self.s[999] = 12.1
120-
self.s.index = pd.MultiIndex.from_product((range(10), range(10), range(10), range(10)))
121-
self.ss = self.s.to_sparse()
122-
123-
def time_sparse_series_to_coo(self):
124-
self.ss.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True)
12572

73+
class FromCoo(object):
12674

127-
class sparse_arithmetic_int(object):
12875
goal_time = 0.2
12976

13077
def setup(self):
131-
np.random.seed(1)
132-
self.a_10percent = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=np.nan)
133-
self.b_10percent = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=np.nan)
134-
135-
self.a_10percent_zero = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=0)
136-
self.b_10percent_zero = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=0)
137-
138-
self.a_1percent = self.make_sparse_array(length=1000000, dense_size=10000, fill_value=np.nan)
139-
self.b_1percent = self.make_sparse_array(length=1000000, dense_size=10000, fill_value=np.nan)
140-
141-
def make_sparse_array(self, length, dense_size, fill_value):
142-
arr = np.array([fill_value] * length, dtype=np.float64)
143-
indexer = np.unique(np.random.randint(0, length, dense_size))
144-
arr[indexer] = np.random.randint(0, 100, len(indexer))
145-
return pd.SparseArray(arr, fill_value=fill_value)
146-
147-
def time_sparse_make_union(self):
148-
self.a_10percent.sp_index.make_union(self.b_10percent.sp_index)
78+
self.matrix = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0],
79+
([1, 0, 0], [0, 2, 3])),
80+
shape=(100, 100))
14981

150-
def time_sparse_intersect(self):
151-
self.a_10percent.sp_index.intersect(self.b_10percent.sp_index)
152-
153-
def time_sparse_addition_10percent(self):
154-
self.a_10percent + self.b_10percent
82+
def time_sparse_series_from_coo(self):
83+
SparseSeries.from_coo(self.matrix)
15584

156-
def time_sparse_addition_10percent_zero(self):
157-
self.a_10percent_zero + self.b_10percent_zero
15885

159-
def time_sparse_addition_1percent(self):
160-
self.a_1percent + self.b_1percent
86+
class ToCoo(object):
16187

162-
def time_sparse_division_10percent(self):
163-
self.a_10percent / self.b_10percent
88+
goal_time = 0.2
16489

165-
def time_sparse_division_10percent_zero(self):
166-
self.a_10percent_zero / self.b_10percent_zero
90+
def setup(self):
91+
s = Series([np.nan] * 10000)
92+
s[0] = 3.0
93+
s[100] = -1.0
94+
s[999] = 12.1
95+
s.index = MultiIndex.from_product([range(10)] * 4)
96+
self.ss = s.to_sparse()
16797

168-
def time_sparse_division_1percent(self):
169-
self.a_1percent / self.b_1percent
98+
def time_sparse_series_to_coo(self):
99+
self.ss.to_coo(row_levels=[0, 1],
100+
column_levels=[2, 3],
101+
sort_labels=True)
170102

171103

104+
class Arithmetic(object):
172105

173-
class sparse_arithmetic_block(object):
174106
goal_time = 0.2
107+
params = ([0.1, 0.01], [0, np.nan])
108+
param_names = ['dense_proportion', 'fill_value']
175109

176-
def setup(self):
177-
np.random.seed(1)
178-
self.a = self.make_sparse_array(length=1000000, num_blocks=1000,
179-
block_size=10, fill_value=np.nan)
180-
self.b = self.make_sparse_array(length=1000000, num_blocks=1000,
181-
block_size=10, fill_value=np.nan)
182-
183-
self.a_zero = self.make_sparse_array(length=1000000, num_blocks=1000,
184-
block_size=10, fill_value=0)
185-
self.b_zero = self.make_sparse_array(length=1000000, num_blocks=1000,
186-
block_size=10, fill_value=np.nan)
110+
def setup(self, dense_proportion, fill_value):
111+
N = 10**6
112+
arr1 = make_array(N, dense_proportion, fill_value, np.int64)
113+
self.array1 = SparseArray(arr1, fill_value=fill_value)
114+
arr2 = make_array(N, dense_proportion, fill_value, np.int64)
115+
self.array2 = SparseArray(arr2, fill_value=fill_value)
187116

188-
def make_sparse_array(self, length, num_blocks, block_size, fill_value):
189-
a = np.array([fill_value] * length)
190-
for block in range(num_blocks):
191-
i = np.random.randint(0, length)
192-
a[i:i + block_size] = np.random.randint(0, 100, len(a[i:i + block_size]))
193-
return pd.SparseArray(a, fill_value=fill_value)
117+
def time_make_union(self, dense_proportion, fill_value):
118+
self.array1.sp_index.make_union(self.array2.sp_index)
194119

195-
def time_sparse_make_union(self):
196-
self.a.sp_index.make_union(self.b.sp_index)
120+
def time_intersect(self, dense_proportion, fill_value):
121+
self.array1.sp_index.intersect(self.array2.sp_index)
197122

198-
def time_sparse_intersect(self):
199-
self.a.sp_index.intersect(self.b.sp_index)
123+
def time_add(self, dense_proportion, fill_value):
124+
self.array1 + self.array2
200125

201-
def time_sparse_addition(self):
202-
self.a + self.b
126+
def time_divide(self, dense_proportion, fill_value):
127+
self.array1 / self.array2
203128

204-
def time_sparse_addition_zero(self):
205-
self.a_zero + self.b_zero
206129

207-
def time_sparse_division(self):
208-
self.a / self.b
130+
class ArithmeticBlock(object):
209131

210-
def time_sparse_division_zero(self):
211-
self.a_zero / self.b_zero
132+
goal_time = 0.2
133+
params = [np.nan, 0]
134+
param_names = ['fill_value']
135+
136+
def setup(self, fill_value):
137+
N = 10**6
138+
self.arr1 = self.make_block_array(length=N, num_blocks=1000,
139+
block_size=10, fill_value=fill_value)
140+
self.arr2 = self.make_block_array(length=N, num_blocks=1000,
141+
block_size=10, fill_value=fill_value)
142+
143+
def make_block_array(self, length, num_blocks, block_size, fill_value):
144+
arr = np.full(length, fill_value)
145+
indicies = np.random.choice(np.arange(0, length, block_size),
146+
num_blocks,
147+
replace=False)
148+
for ind in indicies:
149+
arr[ind:ind + block_size] = np.random.randint(0, 100, block_size)
150+
return SparseArray(arr, fill_value=fill_value)
151+
152+
def time_make_union(self, fill_value):
153+
self.arr1.sp_index.make_union(self.arr2.sp_index)
154+
155+
def time_intersect(self, fill_value):
156+
self.arr2.sp_index.intersect(self.arr2.sp_index)
157+
158+
def time_addition(self, fill_value):
159+
self.arr1 + self.arr2
160+
161+
def time_division(self, fill_value):
162+
self.arr1 / self.arr2

0 commit comments

Comments
 (0)