@@ -821,12 +821,13 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
821821 from pandas .core .reshape .concat import concat
822822 from itertools import cycle
823823
824+ dtypes_to_encode = ['object' , 'category' ]
825+
824826 if isinstance (data , DataFrame ):
825827 # determine columns being encoded
826-
827828 if columns is None :
828829 data_to_encode = data .select_dtypes (
829- include = [ 'object' , 'category' ] )
830+ include = dtypes_to_encode )
830831 else :
831832 data_to_encode = data [columns ]
832833
@@ -844,6 +845,7 @@ def check_len(item, name):
844845
845846 check_len (prefix , 'prefix' )
846847 check_len (prefix_sep , 'prefix_sep' )
848+
847849 if isinstance (prefix , compat .string_types ):
848850 prefix = cycle ([prefix ])
849851 if isinstance (prefix , dict ):
@@ -859,15 +861,20 @@ def check_len(item, name):
859861 prefix_sep = [prefix_sep [col ] for col in data_to_encode .columns ]
860862
861863 if data_to_encode .shape == data .shape :
864+ # Encoding the entire df, do not prepend any dropped columns
862865 with_dummies = []
863866 elif columns is not None :
867+ # Encoding only cols specified in columns. Get all cols not in
868+ # columns to prepend to result.
864869 with_dummies = [data .drop (columns , axis = 1 )]
865870 else :
866- with_dummies = [data .select_dtypes (exclude = ['object' , 'category' ])]
871+ # Encoding only object and category dtype columns. Get remaining
872+ # columns to prepend to result.
873+ with_dummies = [data .select_dtypes (exclude = dtypes_to_encode )]
867874
868875 for (col , pre , sep ) in zip (data_to_encode .iteritems (), prefix ,
869876 prefix_sep ):
870-
877+ # col is (column_name, column), use just column data here
871878 dummy = _get_dummies_1d (col [1 ], prefix = pre , prefix_sep = sep ,
872879 dummy_na = dummy_na , sparse = sparse ,
873880 drop_first = drop_first , dtype = dtype )
0 commit comments