@@ -1308,3 +1308,196 @@ def test_null_group_lambda_self(sort, dropna):
13081308 gb = df .groupby ("A" , dropna = dropna , sort = sort )
13091309 result = gb .transform (lambda x : x )
13101310 tm .assert_frame_equal (result , expected )
1311+
1312+
1313+ def test_null_group_str_reducer (request , dropna , reduction_func ):
1314+ # GH 17093
1315+ if reduction_func in ("corrwith" , "ngroup" ):
1316+ msg = "incorrectly raises"
1317+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1318+ index = [1 , 2 , 3 , 4 ] # test transform preserves non-standard index
1319+ df = DataFrame ({"A" : [1 , 1 , np .nan , np .nan ], "B" : [1 , 2 , 2 , 3 ]}, index = index )
1320+ gb = df .groupby ("A" , dropna = dropna )
1321+
1322+ if reduction_func == "corrwith" :
1323+ args = (df ["B" ],)
1324+ elif reduction_func == "nth" :
1325+ args = (0 ,)
1326+ else :
1327+ args = ()
1328+
1329+ # Manually handle reducers that don't fit the generic pattern
1330+ # Set expected with dropna=False, then replace if necessary
1331+ if reduction_func == "first" :
1332+ expected = DataFrame ({"B" : [1 , 1 , 2 , 2 ]}, index = index )
1333+ elif reduction_func == "last" :
1334+ expected = DataFrame ({"B" : [2 , 2 , 3 , 3 ]}, index = index )
1335+ elif reduction_func == "nth" :
1336+ expected = DataFrame ({"B" : [1 , 1 , 2 , 2 ]}, index = index )
1337+ elif reduction_func == "size" :
1338+ expected = Series ([2 , 2 , 2 , 2 ], index = index )
1339+ elif reduction_func == "corrwith" :
1340+ expected = DataFrame ({"B" : [1.0 , 1.0 , 1.0 , 1.0 ]}, index = index )
1341+ else :
1342+ expected_gb = df .groupby ("A" , dropna = False )
1343+ buffer = []
1344+ for idx , group in expected_gb :
1345+ res = getattr (group ["B" ], reduction_func )()
1346+ buffer .append (Series (res , index = group .index ))
1347+ expected = concat (buffer ).to_frame ("B" )
1348+ if dropna :
1349+ dtype = object if reduction_func in ("any" , "all" ) else float
1350+ expected = expected .astype (dtype )
1351+ if expected .ndim == 2 :
1352+ expected .iloc [[2 , 3 ], 0 ] = np .nan
1353+ else :
1354+ expected .iloc [[2 , 3 ]] = np .nan
1355+
1356+ result = gb .transform (reduction_func , * args )
1357+ tm .assert_equal (result , expected )
1358+
1359+
1360+ def test_null_group_str_transformer (
1361+ request , using_array_manager , dropna , transformation_func
1362+ ):
1363+ # GH 17093
1364+ xfails_block = (
1365+ "cummax" ,
1366+ "cummin" ,
1367+ "cumsum" ,
1368+ "fillna" ,
1369+ "rank" ,
1370+ "backfill" ,
1371+ "ffill" ,
1372+ "bfill" ,
1373+ "pad" ,
1374+ )
1375+ xfails_array = ("cummax" , "cummin" , "cumsum" , "fillna" , "rank" )
1376+ if transformation_func == "tshift" :
1377+ msg = "tshift requires timeseries"
1378+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1379+ elif dropna and (
1380+ (not using_array_manager and transformation_func in xfails_block )
1381+ or (using_array_manager and transformation_func in xfails_array )
1382+ ):
1383+ msg = "produces incorrect results when nans are present"
1384+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1385+ args = (0 ,) if transformation_func == "fillna" else ()
1386+ df = DataFrame ({"A" : [1 , 1 , np .nan ], "B" : [1 , 2 , 2 ]}, index = [1 , 2 , 3 ])
1387+ gb = df .groupby ("A" , dropna = dropna )
1388+
1389+ buffer = []
1390+ for k , (idx , group ) in enumerate (gb ):
1391+ if transformation_func == "cumcount" :
1392+ # DataFrame has no cumcount method
1393+ res = DataFrame ({"B" : range (len (group ))}, index = group .index )
1394+ elif transformation_func == "ngroup" :
1395+ res = DataFrame (len (group ) * [k ], index = group .index , columns = ["B" ])
1396+ else :
1397+ res = getattr (group [["B" ]], transformation_func )(* args )
1398+ buffer .append (res )
1399+ if dropna :
1400+ dtype = object if transformation_func in ("any" , "all" ) else None
1401+ buffer .append (DataFrame ([[np .nan ]], index = [3 ], dtype = dtype , columns = ["B" ]))
1402+ expected = concat (buffer )
1403+
1404+ if transformation_func in ("cumcount" , "ngroup" ):
1405+ # ngroup/cumcount always returns a Series as it counts the groups, not values
1406+ expected = expected ["B" ].rename (None )
1407+
1408+ warn = FutureWarning if transformation_func in ("backfill" , "pad" ) else None
1409+ msg = f"{ transformation_func } is deprecated"
1410+ with tm .assert_produces_warning (warn , match = msg ):
1411+ result = gb .transform (transformation_func , * args )
1412+
1413+ tm .assert_equal (result , expected )
1414+
1415+
1416+ def test_null_group_str_reducer_series (request , dropna , reduction_func ):
1417+ # GH 17093
1418+ if reduction_func == "corrwith" :
1419+ msg = "corrwith not implemented for SeriesGroupBy"
1420+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1421+
1422+ if reduction_func == "ngroup" :
1423+ msg = "ngroup fails"
1424+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1425+
1426+ # GH 17093
1427+ index = [1 , 2 , 3 , 4 ] # test transform preserves non-standard index
1428+ ser = Series ([1 , 2 , 2 , 3 ], index = index )
1429+ gb = ser .groupby ([1 , 1 , np .nan , np .nan ], dropna = dropna )
1430+
1431+ if reduction_func == "corrwith" :
1432+ args = (ser ,)
1433+ elif reduction_func == "nth" :
1434+ args = (0 ,)
1435+ else :
1436+ args = ()
1437+
1438+ # Manually handle reducers that don't fit the generic pattern
1439+ # Set expected with dropna=False, then replace if necessary
1440+ if reduction_func == "first" :
1441+ expected = Series ([1 , 1 , 2 , 2 ], index = index )
1442+ elif reduction_func == "last" :
1443+ expected = Series ([2 , 2 , 3 , 3 ], index = index )
1444+ elif reduction_func == "nth" :
1445+ expected = Series ([1 , 1 , 2 , 2 ], index = index )
1446+ elif reduction_func == "size" :
1447+ expected = Series ([2 , 2 , 2 , 2 ], index = index )
1448+ elif reduction_func == "corrwith" :
1449+ expected = Series ([1 , 1 , 2 , 2 ], index = index )
1450+ else :
1451+ expected_gb = ser .groupby ([1 , 1 , np .nan , np .nan ], dropna = False )
1452+ buffer = []
1453+ for idx , group in expected_gb :
1454+ res = getattr (group , reduction_func )()
1455+ buffer .append (Series (res , index = group .index ))
1456+ expected = concat (buffer )
1457+ if dropna :
1458+ dtype = object if reduction_func in ("any" , "all" ) else float
1459+ expected = expected .astype (dtype )
1460+ expected .iloc [[2 , 3 ]] = np .nan
1461+
1462+ result = gb .transform (reduction_func , * args )
1463+ tm .assert_series_equal (result , expected )
1464+
1465+
1466+ def test_null_group_str_transformer_series (request , dropna , transformation_func ):
1467+ # GH 17093
1468+ if transformation_func == "tshift" :
1469+ msg = "tshift requires timeseries"
1470+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1471+ elif dropna and transformation_func in (
1472+ "cummax" ,
1473+ "cummin" ,
1474+ "cumsum" ,
1475+ "fillna" ,
1476+ "rank" ,
1477+ ):
1478+ msg = "produces incorrect results when nans are present"
1479+ request .node .add_marker (pytest .mark .xfail (reason = msg ))
1480+ args = (0 ,) if transformation_func == "fillna" else ()
1481+ ser = Series ([1 , 2 , 2 ], index = [1 , 2 , 3 ])
1482+ gb = ser .groupby ([1 , 1 , np .nan ], dropna = dropna )
1483+
1484+ buffer = []
1485+ for k , (idx , group ) in enumerate (gb ):
1486+ if transformation_func == "cumcount" :
1487+ # Series has no cumcount method
1488+ res = Series (range (len (group )), index = group .index )
1489+ elif transformation_func == "ngroup" :
1490+ res = Series (k , index = group .index )
1491+ else :
1492+ res = getattr (group , transformation_func )(* args )
1493+ buffer .append (res )
1494+ if dropna :
1495+ dtype = object if transformation_func in ("any" , "all" ) else None
1496+ buffer .append (Series ([np .nan ], index = [3 ], dtype = dtype ))
1497+ expected = concat (buffer )
1498+
1499+ warn = FutureWarning if transformation_func in ("backfill" , "pad" ) else None
1500+ msg = f"{ transformation_func } is deprecated"
1501+ with tm .assert_produces_warning (warn , match = msg ):
1502+ result = gb .transform (transformation_func , * args )
1503+ tm .assert_equal (result , expected )
0 commit comments