11import warnings
22
33import numpy as np
4- from pandas import Series
4+ from pandas import Series , DataFrame
55import pandas .util .testing as tm
66
77
@@ -12,9 +12,6 @@ class Methods(object):
1212 def setup (self ):
1313 self .s = Series (tm .makeStringIndex (10 ** 5 ))
1414
15- def time_cat (self ):
16- self .s .str .cat (sep = ',' )
17-
1815 def time_center (self ):
1916 self .s .str .center (100 )
2017
@@ -87,6 +84,32 @@ def time_repeat(self, repeats):
8784 self .s .str .repeat (self .repeat )
8885
8986
87+ class Cat (object ):
88+
89+ goal_time = 0.2
90+ params = ([0 , 3 ], [None , ',' ], [None , '-' ], [0.0 , 0.001 , 0.15 ])
91+ param_names = ['other_cols' , 'sep' , 'na_rep' , 'na_frac' ]
92+
93+ def setup (self , other_cols , sep , na_rep , na_frac ):
94+ N = 10 ** 5
95+ mask_gen = lambda : np .random .choice ([True , False ], N ,
96+ p = [1 - na_frac , na_frac ])
97+ self .s = Series (tm .makeStringIndex (N )).where (mask_gen ())
98+ if other_cols == 0 :
99+ # str.cat self-concatenates only for others=None
100+ self .others = None
101+ else :
102+ self .others = DataFrame ({i : tm .makeStringIndex (N ).where (mask_gen ())
103+ for i in range (other_cols )})
104+
105+ def time_cat (self , other_cols , sep , na_rep , na_frac ):
106+ # before the concatenation (one caller + other_cols columns), the total
107+ # expected fraction of rows containing any NaN is:
108+ # reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0)
109+ # for other_cols=3 and na_frac=0.15, this works out to ~48%
110+ self .s .str .cat (others = self .others , sep = sep , na_rep = na_rep )
111+
112+
90113class Contains (object ):
91114
92115 goal_time = 0.2
0 commit comments