|
3 | 3 | from pandas.types.concat import union_categoricals |
4 | 4 | except ImportError: |
5 | 5 | pass |
6 | | -import string |
7 | 6 |
|
8 | 7 |
|
9 | | -class concat_categorical(object): |
| 8 | +class Categoricals(object): |
10 | 9 | goal_time = 0.2 |
11 | 10 |
|
12 | 11 | def setup(self): |
13 | | - self.s = pd.Series((list('aabbcd') * 1000000)).astype('category') |
| 12 | + N = 100000 |
| 13 | + self.s = pd.Series((list('aabbcd') * N)).astype('category') |
14 | 14 |
|
15 | | - def time_concat_categorical(self): |
16 | | - concat([self.s, self.s]) |
| 15 | + self.a = pd.Categorical((list('aabbcd') * N)) |
| 16 | + self.b = pd.Categorical((list('bbcdjk') * N)) |
17 | 17 |
|
| 18 | + self.categories = list('abcde') |
| 19 | + self.cat_idx = Index(self.categories) |
| 20 | + self.values = np.tile(self.categories, N) |
| 21 | + self.codes = np.tile(range(len(self.categories)), N) |
18 | 22 |
|
19 | | -class union_categorical(object): |
20 | | - goal_time = 0.2 |
| 23 | + self.datetimes = pd.Series(pd.date_range( |
| 24 | + '1995-01-01 00:00:00', periods=10000, freq='s')) |
21 | 25 |
|
22 | | - def setup(self): |
23 | | - self.a = pd.Categorical((list('aabbcd') * 1000000)) |
24 | | - self.b = pd.Categorical((list('bbcdjk') * 1000000)) |
| 26 | + def time_concat(self): |
| 27 | + concat([self.s, self.s]) |
25 | 28 |
|
26 | | - def time_union_categorical(self): |
| 29 | + def time_union(self): |
27 | 30 | union_categoricals([self.a, self.b]) |
28 | 31 |
|
29 | | - |
30 | | -class categorical_value_counts(object): |
31 | | - goal_time = 1 |
32 | | - |
33 | | - def setup(self): |
34 | | - n = 500000 |
35 | | - np.random.seed(2718281) |
36 | | - arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] |
37 | | - self.ts = Series(arr).astype('category') |
38 | | - |
39 | | - def time_value_counts(self): |
40 | | - self.ts.value_counts(dropna=False) |
41 | | - |
42 | | - def time_value_counts_dropna(self): |
43 | | - self.ts.value_counts(dropna=True) |
44 | | - |
45 | | - |
46 | | -class categorical_constructor(object): |
47 | | - goal_time = 0.2 |
48 | | - |
49 | | - def setup(self): |
50 | | - n = 5 |
51 | | - N = 1e6 |
52 | | - self.categories = list(string.ascii_letters[:n]) |
53 | | - self.cat_idx = Index(self.categories) |
54 | | - self.values = np.tile(self.categories, N) |
55 | | - self.codes = np.tile(range(n), N) |
56 | | - |
57 | | - def time_regular_constructor(self): |
| 32 | + def time_constructor_regular(self): |
58 | 33 | Categorical(self.values, self.categories) |
59 | 34 |
|
60 | | - def time_fastpath(self): |
| 35 | + def time_constructor_fastpath(self): |
61 | 36 | Categorical(self.codes, self.cat_idx, fastpath=True) |
62 | 37 |
|
63 | | - |
64 | | -class categorical_constructor_with_datetimes(object): |
65 | | - goal_time = 0.2 |
66 | | - |
67 | | - def setup(self): |
68 | | - self.datetimes = pd.Series(pd.date_range( |
69 | | - '1995-01-01 00:00:00', periods=10000, freq='s')) |
70 | | - |
71 | | - def time_datetimes(self): |
| 38 | + def time_constructor_datetimes(self): |
72 | 39 | Categorical(self.datetimes) |
73 | 40 |
|
74 | | - def time_datetimes_with_nat(self): |
| 41 | + def time_constructor_datetimes_with_nat(self): |
75 | 42 | t = self.datetimes |
76 | 43 | t.iloc[-1] = pd.NaT |
77 | 44 | Categorical(t) |
78 | 45 |
|
79 | 46 |
|
80 | | -class categorical_rendering(object): |
81 | | - goal_time = 3e-3 |
| 47 | +class Categoricals2(object): |
| 48 | + goal_time = 0.2 |
82 | 49 |
|
83 | 50 | def setup(self): |
84 | | - n = 1000 |
85 | | - items = [str(i) for i in range(n)] |
86 | | - s = pd.Series(items, dtype='category') |
87 | | - df = pd.DataFrame({'C': s, 'data': np.random.randn(n)}) |
88 | | - self.data = df[df.C == '20'] |
| 51 | + n = 500000 |
| 52 | + np.random.seed(2718281) |
| 53 | + arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] |
| 54 | + self.ts = Series(arr).astype('category') |
| 55 | + |
| 56 | + self.sel = self.ts.loc[[0]] |
| 57 | + |
| 58 | + def time_value_counts(self): |
| 59 | + self.ts.value_counts(dropna=False) |
| 60 | + |
| 61 | + def time_value_counts_dropna(self): |
| 62 | + self.ts.value_counts(dropna=True) |
89 | 63 |
|
90 | 64 | def time_rendering(self): |
91 | | - str(self.data.C) |
| 65 | + str(self.sel) |
0 commit comments