File tree 1 file changed +15
-15
lines changed 1 file changed +15
-15
lines changed Original file line number Diff line number Diff line change @@ -472,31 +472,31 @@ def str_get_dummies(arr, sep='|'):
472
472
2 1 0 1
473
473
474
474
>>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
475
- a b c
476
- 0 1 1 0
477
- 1 NaN NaN NaN
478
- 2 1 0 1
475
+ a b c
476
+ 0 1 1 0
477
+ 1 0 0 0
478
+ 2 1 0 1
479
479
480
480
See also ``pd.get_dummies``.
481
481
482
482
"""
483
- def na_setunion (x , y ):
484
- try :
485
- return x .union (y )
486
- except TypeError :
487
- return x
488
-
489
483
# TODO remove this hack?
490
- arr = sep + arr .fillna ('' ).astype (str ) + sep
484
+ arr = arr .fillna ('' )
485
+ try :
486
+ arr = sep + arr + sep
487
+ except TypeError :
488
+ arr = sep + arr .astype (str ) + sep
489
+
490
+ tags = set ()
491
+ for ts in arr .str .split (sep ):
492
+ tags .update (ts )
493
+ tags = sorted (tags - set (["" ]))
491
494
492
- from functools import reduce
493
- tags = sorted (reduce (na_setunion , arr .str .split (sep ), set ())
494
- - set (['' ]))
495
495
dummies = np .empty ((len (arr ), len (tags )), dtype = int )
496
496
497
497
for i , t in enumerate (tags ):
498
498
pat = sep + t + sep
499
- dummies [:, i ] = _na_map ( lambda x : pat in x , arr )
499
+ dummies [:, i ] = lib . map_infer ( arr . values , lambda x : pat in x )
500
500
return DataFrame (dummies , arr .index , tags )
501
501
502
502
You can’t perform that action at this time.
0 commit comments