@@ -102,9 +102,80 @@ def f(g):
102102 group_keys = grouper ._get_group_keys ()
103103
104104 values , mutated = splitter .fast_apply (f , group_keys )
105+
105106 assert not mutated
106107
107108
109+ @pytest .mark .parametrize (
110+ "df, group_names" ,
111+ [
112+ (DataFrame ({"a" : [1 , 1 , 1 , 2 , 3 ],
113+ "b" : ["a" , "a" , "a" , "b" , "c" ]}),
114+ [1 , 2 , 3 ]),
115+ (DataFrame ({"a" : [0 , 0 , 1 , 1 ],
116+ "b" : [0 , 1 , 0 , 1 ]}),
117+ [0 , 1 ]),
118+ (DataFrame ({"a" : [1 ]}),
119+ [1 ]),
120+ (DataFrame ({"a" : [1 , 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
121+ "b" : range (8 )}),
122+ [1 , 2 ]),
123+ (DataFrame ({"a" : [1 , 2 , 3 , 1 , 2 , 3 ],
124+ "two" : [4 , 5 , 6 , 7 , 8 , 9 ]}),
125+ [1 , 2 , 3 ]),
126+ (DataFrame ({"a" : list ("aaabbbcccc" ),
127+ "B" : [3 , 4 , 3 , 6 , 5 , 2 , 1 , 9 , 5 , 4 ],
128+ "C" : [4 , 0 , 2 , 2 , 2 , 7 , 8 , 6 , 2 , 8 ]}),
129+ ["a" , "b" , "c" ]),
130+ (DataFrame ([[1 , 2 , 3 ], [2 , 2 , 3 ]], columns = ["a" , "b" , "c" ]),
131+ [1 , 2 ]),
132+ ], ids = ['GH2936' , 'GH7739 & GH10519' , 'GH10519' ,
133+ 'GH2656' , 'GH12155' , 'GH20084' , 'GH21417' ])
134+ def test_group_apply_once_per_group (df , group_names ):
135+ # GH2936, GH7739, GH10519, GH2656, GH12155, GH20084, GH21417
136+
137+ # This test should ensure that a function is only evaluted
138+ # once per group. Previously the function has been evaluated twice
139+ # on the first group to check if the Cython index slider is safe to use
140+ # This test ensures that the side effect (append to list) is only triggered
141+ # once per group
142+
143+ names = []
144+ # cannot parameterize over the functions since they need external
145+ # `names` to detect side effects
146+
147+ def f_copy (group ):
148+ # this takes the fast apply path
149+ names .append (group .name )
150+ return group .copy ()
151+
152+ def f_nocopy (group ):
153+ # this takes the slow apply path
154+ names .append (group .name )
155+ return group
156+
157+ def f_scalar (group ):
158+ # GH7739, GH2656
159+ names .append (group .name )
160+ return 0
161+
162+ def f_none (group ):
163+ # GH10519, GH12155, GH21417
164+ names .append (group .name )
165+ return None
166+
167+ def f_constant_df (group ):
168+ # GH2936, GH20084
169+ names .append (group .name )
170+ return DataFrame ({"a" : [1 ], "b" : [1 ]})
171+
172+ for func in [f_copy , f_nocopy , f_scalar , f_none , f_constant_df ]:
173+ del names [:]
174+
175+ df .groupby ("a" ).apply (func )
176+ assert names == group_names
177+
178+
108179def test_apply_with_mixed_dtype ():
109180 # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
110181 df = DataFrame ({'foo1' : np .random .randn (6 ),
0 commit comments