@@ -1858,6 +1858,7 @@ def test_pivot_table_values_key_error():
18581858 Categorical ([0 ]),
18591859 [to_datetime (0 )],
18601860 date_range (0 , 1 , 1 , tz = "US/Eastern" ),
1861+ pd .period_range ("2016-01-01" , periods = 3 , freq = "D" ),
18611862 pd .array ([0 ], dtype = "Int64" ),
18621863 pd .array ([0 ], dtype = "Float64" ),
18631864 pd .array ([False ], dtype = "boolean" ),
@@ -1870,6 +1871,7 @@ def test_pivot_table_values_key_error():
18701871 "cat" ,
18711872 "dt64" ,
18721873 "dt64tz" ,
1874+ "period" ,
18731875 "Int64" ,
18741876 "Float64" ,
18751877 "boolean" ,
@@ -1886,13 +1888,6 @@ def test_empty_groupby(
18861888 override_dtype = None
18871889
18881890 if (
1889- isinstance (values , Categorical )
1890- and not isinstance (columns , list )
1891- and op in ["sum" , "prod" , "skew" ]
1892- ):
1893- # handled below GH#41291
1894- pass
1895- elif (
18961891 isinstance (values , Categorical )
18971892 and len (keys ) == 1
18981893 and op in ["idxmax" , "idxmin" ]
@@ -1901,18 +1896,8 @@ def test_empty_groupby(
19011896 raises = ValueError , match = "attempt to get arg(min|max) of an empty sequence"
19021897 )
19031898 request .node .add_marker (mark )
1904- elif isinstance (values , Categorical ) and len (keys ) == 1 and op in ["sum" , "prod" ]:
1905- mark = pytest .mark .xfail (
1906- raises = AssertionError , match = "(DataFrame|Series) are different"
1907- )
1908- request .node .add_marker (mark )
1909- elif isinstance (values , Categorical ) and len (keys ) == 2 and op in ["sum" ]:
1910- mark = pytest .mark .xfail (
1911- raises = AssertionError , match = "(DataFrame|Series) are different"
1912- )
1913- request .node .add_marker (mark )
19141899
1915- elif isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
1900+ if isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
19161901 # We expect to get Int64 back for these
19171902 override_dtype = "Int64"
19181903
@@ -1936,6 +1921,26 @@ def get_result(**kwargs):
19361921 else :
19371922 return getattr (gb , method )(op , ** kwargs )
19381923
1924+ def get_categorical_invalid_expected ():
1925+ # Categorical is special without 'observed=True', we get an NaN entry
1926+ # corresponding to the unobserved group. If we passed observed=True
1927+ # to groupby, expected would just be 'df.set_index(keys)[columns]'
1928+ # as below
1929+ lev = Categorical ([0 ], dtype = values .dtype )
1930+ if len (keys ) != 1 :
1931+ idx = MultiIndex .from_product ([lev , lev ], names = keys )
1932+ else :
1933+ # all columns are dropped, but we end up with one row
1934+ # Categorical is special without 'observed=True'
1935+ idx = Index (lev , name = keys [0 ])
1936+
1937+ expected = DataFrame ([], columns = [], index = idx )
1938+ return expected
1939+
1940+ is_per = isinstance (df .dtypes [0 ], pd .PeriodDtype )
1941+ is_dt64 = df .dtypes [0 ].kind == "M"
1942+ is_cat = isinstance (values , Categorical )
1943+
19391944 if isinstance (values , Categorical ) and not values .ordered and op in ["min" , "max" ]:
19401945 msg = f"Cannot perform { op } with non-ordered Categorical"
19411946 with pytest .raises (TypeError , match = msg ):
@@ -1944,105 +1949,47 @@ def get_result(**kwargs):
19441949 if isinstance (columns , list ):
19451950 # i.e. DataframeGroupBy, not SeriesGroupBy
19461951 result = get_result (numeric_only = True )
1947-
1948- # Categorical is special without 'observed=True', we get an NaN entry
1949- # corresponding to the unobserved group. If we passed observed=True
1950- # to groupby, expected would just be 'df.set_index(keys)[columns]'
1951- # as below
1952- lev = Categorical ([0 ], dtype = values .dtype )
1953- if len (keys ) != 1 :
1954- idx = MultiIndex .from_product ([lev , lev ], names = keys )
1955- else :
1956- # all columns are dropped, but we end up with one row
1957- # Categorical is special without 'observed=True'
1958- idx = Index (lev , name = keys [0 ])
1959-
1960- expected = DataFrame ([], columns = [], index = idx )
1952+ expected = get_categorical_invalid_expected ()
19611953 tm .assert_equal (result , expected )
19621954 return
19631955
1964- if columns == "C" :
1965- # i.e. SeriesGroupBy
1966- if op in ["prod" , "sum" , "skew" ]:
1967- # ops that require more than just ordered-ness
1968- if df .dtypes [0 ].kind == "M" :
1969- # GH#41291
1970- # datetime64 -> prod and sum are invalid
1971- if op == "skew" :
1972- msg = "does not support reduction 'skew'"
1973- else :
1974- msg = "datetime64 type does not support"
1975- with pytest .raises (TypeError , match = msg ):
1976- get_result ()
1977-
1978- return
1979- if op in ["prod" , "sum" , "skew" ]:
1980- if isinstance (values , Categorical ):
1981- # GH#41291
1982- if op == "skew" :
1983- msg = f"does not support reduction '{ op } '"
1984- else :
1985- msg = "category type does not support"
1986- with pytest .raises (TypeError , match = msg ):
1987- get_result ()
1956+ if op in ["prod" , "sum" , "skew" ]:
1957+ # ops that require more than just ordered-ness
1958+ if is_dt64 or is_cat or is_per :
1959+ # GH#41291
1960+ # datetime64 -> prod and sum are invalid
1961+ if op == "skew" :
1962+ msg = "does not support reduction 'skew'"
1963+ elif is_dt64 :
1964+ msg = "datetime64 type does not support"
1965+ elif is_per :
1966+ msg = "Period type does not support"
1967+ else :
1968+ msg = "category type does not support"
1969+ with pytest .raises (TypeError , match = msg ):
1970+ get_result ()
19881971
1972+ if not isinstance (columns , list ):
1973+ # i.e. SeriesGroupBy
19891974 return
1990- else :
1991- # ie. DataFrameGroupBy
1992- if op in ["prod" , "sum" ]:
1993- # ops that require more than just ordered-ness
1994- if df .dtypes [0 ].kind == "M" :
1995- # GH#41291
1996- # datetime64 -> prod and sum are invalid
1997- with pytest .raises (TypeError , match = "datetime64 type does not support" ):
1998- get_result ()
1999- result = get_result (numeric_only = True )
2000-
2001- # with numeric_only=True, these are dropped, and we get
2002- # an empty DataFrame back
2003- expected = df .set_index (keys )[[]]
2004- tm .assert_equal (result , expected )
1975+ elif op == "skew" :
1976+ # TODO: test the numeric_only=True case
20051977 return
2006-
2007- elif isinstance (values , Categorical ):
1978+ else :
1979+ # i.e. op in ["prod", "sum"]:
1980+ # i.e. DataFrameGroupBy
1981+ # ops that require more than just ordered-ness
20081982 # GH#41291
2009- # Categorical doesn't implement sum or prod
2010- with pytest .raises (TypeError , match = "category type does not support" ):
2011- get_result ()
20121983 result = get_result (numeric_only = True )
20131984
20141985 # with numeric_only=True, these are dropped, and we get
20151986 # an empty DataFrame back
20161987 expected = df .set_index (keys )[[]]
2017- if len (keys ) != 1 and op == "prod" :
2018- # TODO: why just prod and not sum?
2019- # Categorical is special without 'observed=True'
2020- lev = Categorical ([0 ], dtype = values .dtype )
2021- mi = MultiIndex .from_product ([lev , lev ], names = ["A" , "B" ])
2022- expected = DataFrame ([], columns = [], index = mi )
2023-
2024- tm .assert_equal (result , expected )
2025- return
2026-
2027- elif df .dtypes [0 ] == object :
2028- result = get_result ()
2029- expected = df .set_index (keys )[["C" ]]
1988+ if is_cat :
1989+ expected = get_categorical_invalid_expected ()
20301990 tm .assert_equal (result , expected )
20311991 return
20321992
2033- if op == "skew" and (
2034- isinstance (values , Categorical ) or df .dtypes [0 ].kind == "M"
2035- ):
2036- msg = "|" .join (
2037- [
2038- "Categorical is not ordered" ,
2039- "does not support reduction" ,
2040- ]
2041- )
2042- with pytest .raises (TypeError , match = msg ):
2043- get_result ()
2044- return
2045-
20461993 result = get_result ()
20471994 expected = df .set_index (keys )[columns ]
20481995 if override_dtype is not None :
0 commit comments