File tree Expand file tree Collapse file tree 1 file changed +15
-15
lines changed
Expand file tree Collapse file tree 1 file changed +15
-15
lines changed Original file line number Diff line number Diff line change @@ -472,31 +472,31 @@ def str_get_dummies(arr, sep='|'):
472472 2 1 0 1
473473
474474 >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
475- a b c
476- 0 1 1 0
477- 1 NaN NaN NaN
478- 2 1 0 1
475+ a b c
476+ 0 1 1 0
477+ 1 0 0 0
478+ 2 1 0 1
479479
480480 See also ``pd.get_dummies``.
481481
482482 """
483- def na_setunion (x , y ):
484- try :
485- return x .union (y )
486- except TypeError :
487- return x
488-
489483 # TODO remove this hack?
490- arr = sep + arr .fillna ('' ).astype (str ) + sep
484+ arr = arr .fillna ('' )
485+ try :
486+ arr = sep + arr + sep
487+ except TypeError :
488+ arr = sep + arr .astype (str ) + sep
489+
490+ tags = set ()
491+ for ts in arr .str .split (sep ):
492+ tags .update (ts )
493+ tags = sorted (tags - set (["" ]))
491494
492- from functools import reduce
493- tags = sorted (reduce (na_setunion , arr .str .split (sep ), set ())
494- - set (['' ]))
495495 dummies = np .empty ((len (arr ), len (tags )), dtype = int )
496496
497497 for i , t in enumerate (tags ):
498498 pat = sep + t + sep
499- dummies [:, i ] = _na_map ( lambda x : pat in x , arr )
499+ dummies [:, i ] = lib . map_infer ( arr . values , lambda x : pat in x )
500500 return DataFrame (dummies , arr .index , tags )
501501
502502
You can’t perform that action at this time.
0 commit comments