@@ -445,6 +445,23 @@ def _selection_list(self):
445445 return [self ._selection ]
446446 return self ._selection
447447
448+ @cache_readonly
449+ def _selected_obj (self ):
450+
451+ if self ._selection is None or isinstance (self .obj , Series ):
452+ return self .obj
453+ else :
454+ return self .obj [self ._selection ]
455+
456+ def _set_selection_from_grouper (self ):
457+ """ we may need create a selection if we have non-level groupers """
458+ grp = self .grouper
459+ if self ._selection is None and self .as_index and getattr (grp ,'groupings' ,None ) is not None :
460+ ax = self .obj ._info_axis
461+ groupers = [ g .name for g in grp .groupings if g .level is None and g .name is not None and g .name in ax ]
462+ if len (groupers ):
463+ self ._selection = (ax - Index (groupers )).tolist ()
464+
448465 def _local_dir (self ):
449466 return sorted (set (self .obj ._local_dir () + list (self ._apply_whitelist )))
450467
@@ -453,7 +470,6 @@ def __getattr__(self, attr):
453470 return object .__getattribute__ (self , attr )
454471 if attr in self .obj :
455472 return self [attr ]
456-
457473 if hasattr (self .obj , attr ):
458474 return self ._make_wrapper (attr )
459475
@@ -472,6 +488,10 @@ def _make_wrapper(self, name):
472488 type (self ).__name__ ))
473489 raise AttributeError (msg )
474490
491+ # need to setup the selection
492+ # as are not passed directly but in the grouper
493+ self ._set_selection_from_grouper ()
494+
475495 f = getattr (self ._selected_obj , name )
476496 if not isinstance (f , types .MethodType ):
477497 return self .apply (lambda self : getattr (self , name ))
@@ -503,7 +523,19 @@ def curried(x):
503523 try :
504524 return self .apply (curried_with_axis )
505525 except Exception :
506- return self .apply (curried )
526+ try :
527+ return self .apply (curried )
528+ except Exception :
529+
530+ # related to : GH3688
531+ # try item-by-item
532+ # this can be called recursively, so need to raise ValueError if
533+ # we don't have this method to indicated to aggregate to
534+ # mark this column as an error
535+ try :
536+ return self ._aggregate_item_by_item (name , * args , ** kwargs )
537+ except (AttributeError ):
538+ raise ValueError
507539
508540 return wrapper
509541
@@ -624,6 +656,7 @@ def mean(self):
624656 except GroupByError :
625657 raise
626658 except Exception : # pragma: no cover
659+ self ._set_selection_from_grouper ()
627660 f = lambda x : x .mean (axis = self .axis )
628661 return self ._python_agg_general (f )
629662
@@ -639,6 +672,7 @@ def median(self):
639672 raise
640673 except Exception : # pragma: no cover
641674
675+ self ._set_selection_from_grouper ()
642676 def f (x ):
643677 if isinstance (x , np .ndarray ):
644678 x = Series (x )
@@ -655,6 +689,7 @@ def std(self, ddof=1):
655689 if ddof == 1 :
656690 return self ._cython_agg_general ('std' )
657691 else :
692+ self ._set_selection_from_grouper ()
658693 f = lambda x : x .std (ddof = ddof )
659694 return self ._python_agg_general (f )
660695
@@ -667,15 +702,26 @@ def var(self, ddof=1):
667702 if ddof == 1 :
668703 return self ._cython_agg_general ('var' )
669704 else :
705+ self ._set_selection_from_grouper ()
670706 f = lambda x : x .var (ddof = ddof )
671707 return self ._python_agg_general (f )
672708
673709 def size (self ):
674710 """
675711 Compute group sizes
712+
676713 """
677714 return self .grouper .size ()
678715
716+ def count (self , axis = 0 ):
717+ """
718+ Number of non-null items in each group.
719+ axis : axis number, default 0
720+ the grouping axis
721+ """
722+ self ._set_selection_from_grouper ()
723+ return self ._python_agg_general (lambda x : notnull (x ).sum (axis = axis )).astype ('int64' )
724+
679725 sum = _groupby_function ('sum' , 'add' , np .sum )
680726 prod = _groupby_function ('prod' , 'prod' , np .prod )
681727 min = _groupby_function ('min' , 'min' , np .min , numeric_only = False )
@@ -685,14 +731,14 @@ def size(self):
685731 last = _groupby_function ('last' , 'last' , _last_compat , numeric_only = False ,
686732 _convert = True )
687733
734+
688735 def ohlc (self ):
689736 """
690737 Compute sum of values, excluding missing values
691-
692738 For multiple groupings, the result index will be a MultiIndex
693-
694739 """
695- return self ._cython_agg_general ('ohlc' )
740+ return self ._apply_to_column_groupbys (
741+ lambda x : x ._cython_agg_general ('ohlc' ))
696742
697743 def nth (self , n , dropna = None ):
698744 """
@@ -888,13 +934,6 @@ def _cumcount_array(self, arr=None, **kwargs):
888934 cumcounts [v ] = arr [len (v )- 1 ::- 1 ]
889935 return cumcounts
890936
891- @cache_readonly
892- def _selected_obj (self ):
893- if self ._selection is None or isinstance (self .obj , Series ):
894- return self .obj
895- else :
896- return self .obj [self ._selection ]
897-
898937 def _index_with_as_index (self , b ):
899938 """
900939 Take boolean mask of index to be returned from apply, if as_index=True
@@ -990,12 +1029,23 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
9901029 result = result .reindex (ax )
9911030 else :
9921031 result = result .reindex_axis (ax , axis = self .axis )
993- elif self .group_keys and self .as_index :
994- group_keys = keys
995- group_levels = self .grouper .levels
996- group_names = self .grouper .names
997- result = concat (values , axis = self .axis , keys = group_keys ,
998- levels = group_levels , names = group_names )
1032+
1033+ elif self .group_keys :
1034+
1035+ if self .as_index :
1036+
1037+ # possible MI return case
1038+ group_keys = keys
1039+ group_levels = self .grouper .levels
1040+ group_names = self .grouper .names
1041+ result = concat (values , axis = self .axis , keys = group_keys ,
1042+ levels = group_levels , names = group_names )
1043+ else :
1044+
1045+ # GH5610, returns a MI, with the first level being a
1046+ # range index
1047+ keys = list (range (len (values )))
1048+ result = concat (values , axis = self .axis , keys = keys )
9991049 else :
10001050 result = concat (values , axis = self .axis )
10011051
@@ -2187,6 +2237,9 @@ def true_and_notnull(x, *args, **kwargs):
21872237 filtered = self ._apply_filter (indices , dropna )
21882238 return filtered
21892239
2240+ def _apply_to_column_groupbys (self , func ):
2241+ """ return a pass thru """
2242+ return func (self )
21902243
21912244class NDFrameGroupBy (GroupBy ):
21922245
@@ -2486,6 +2539,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
24862539 elif hasattr (self .grouper , 'groupings' ):
24872540 if len (self .grouper .groupings ) > 1 :
24882541 key_index = MultiIndex .from_tuples (keys , names = key_names )
2542+
24892543 else :
24902544 ping = self .grouper .groupings [0 ]
24912545 if len (keys ) == ping .ngroups :
@@ -2498,8 +2552,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
24982552 # reorder the values
24992553 values = [values [i ] for i in indexer ]
25002554 else :
2555+
25012556 key_index = Index (keys , name = key_names [0 ])
25022557
2558+ # don't use the key indexer
2559+ if not self .as_index :
2560+ key_index = None
2561+
25032562 # make Nones an empty object
25042563 if com ._count_not_none (* values ) != len (values ):
25052564 v = next (v for v in values if v is not None )
@@ -2569,7 +2628,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
25692628
25702629 # normally use vstack as its faster than concat
25712630 # and if we have mi-columns
2572- if not _np_version_under1p7 or isinstance (v .index ,MultiIndex ):
2631+ if not _np_version_under1p7 or isinstance (v .index ,MultiIndex ) or key_index is None :
25732632 stacked_values = np .vstack ([np .asarray (x ) for x in values ])
25742633 result = DataFrame (stacked_values ,index = key_index ,columns = index )
25752634 else :
@@ -2889,16 +2948,6 @@ def _apply_to_column_groupbys(self, func):
28892948 in self ._iterate_column_groupbys ()),
28902949 keys = self ._selected_obj .columns , axis = 1 )
28912950
2892- def ohlc (self ):
2893- """
2894- Compute sum of values, excluding missing values
2895-
2896- For multiple groupings, the result index will be a MultiIndex
2897- """
2898- return self ._apply_to_column_groupbys (
2899- lambda x : x ._cython_agg_general ('ohlc' ))
2900-
2901-
29022951from pandas .tools .plotting import boxplot_frame_groupby
29032952DataFrameGroupBy .boxplot = boxplot_frame_groupby
29042953
0 commit comments