@@ -560,6 +560,74 @@ def test_unstack_dtypes(self):
560560 assert left .shape == (3 , 2 )
561561 tm .assert_frame_equal (left , right )
562562
563+ def test_unstack_unused_levels (self ):
564+ # GH 17845: unused labels in index make unstack() cast int to float
565+ idx = pd .MultiIndex .from_product ([['a' ], ['A' , 'B' , 'C' , 'D' ]])[:- 1 ]
566+ df = pd .DataFrame ([[1 , 0 ]] * 3 , index = idx )
567+
568+ result = df .unstack ()
569+ exp_col = pd .MultiIndex .from_product ([[0 , 1 ], ['A' , 'B' , 'C' ]])
570+ expected = pd .DataFrame ([[1 , 1 , 1 , 0 , 0 , 0 ]], index = ['a' ],
571+ columns = exp_col )
572+ tm .assert_frame_equal (result , expected )
573+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
574+
575+ # Unused items on both levels
576+ levels = [[0 , 1 , 7 ], [0 , 1 , 2 , 3 ]]
577+ labels = [[0 , 0 , 1 , 1 ], [0 , 2 , 0 , 2 ]]
578+ idx = pd .MultiIndex (levels , labels )
579+ block = np .arange (4 ).reshape (2 , 2 )
580+ df = pd .DataFrame (np .concatenate ([block , block + 4 ]), index = idx )
581+ result = df .unstack ()
582+ expected = pd .DataFrame (np .concatenate ([block * 2 , block * 2 + 1 ],
583+ axis = 1 ),
584+ columns = idx )
585+ tm .assert_frame_equal (result , expected )
586+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
587+
588+ # With mixed dtype and NaN
589+ levels = [['a' , 2 , 'c' ], [1 , 3 , 5 , 7 ]]
590+ labels = [[0 , - 1 , 1 , 1 ], [0 , 2 , - 1 , 2 ]]
591+ idx = pd .MultiIndex (levels , labels )
592+ data = np .arange (8 )
593+ df = pd .DataFrame (data .reshape (4 , 2 ), index = idx )
594+
595+ cases = ((0 , [13 , 16 , 6 , 9 , 2 , 5 , 8 , 11 ],
596+ [np .nan , 'a' , 2 ], [np .nan , 5 , 1 ]),
597+ (1 , [8 , 11 , 1 , 4 , 12 , 15 , 13 , 16 ],
598+ [np .nan , 5 , 1 ], [np .nan , 'a' , 2 ]))
599+ for level , idces , col_level , idx_level in cases :
600+ result = df .unstack (level = level )
601+ exp_data = np .zeros (18 ) * np .nan
602+ exp_data [idces ] = data
603+ cols = pd .MultiIndex .from_product ([[0 , 1 ], col_level ])
604+ expected = pd .DataFrame (exp_data .reshape (3 , 6 ),
605+ index = idx_level , columns = cols )
606+ # Broken (GH 18455):
607+ # tm.assert_frame_equal(result, expected)
608+ diff = result - expected
609+ assert (diff .sum ().sum () == 0 )
610+ assert ((diff + 1 ).sum ().sum () == 8 )
611+
612+ assert ((result .columns .levels [1 ] == idx .levels [level ]).all ())
613+
614+ @pytest .mark .parametrize ("cols" , [['A' , 'C' ], slice (None )])
615+ def test_unstack_unused_level (self , cols ):
616+ # GH 18562 : unused labels on the unstacked level
617+ df = pd .DataFrame ([[2010 , 'a' , 'I' ],
618+ [2011 , 'b' , 'II' ]],
619+ columns = ['A' , 'B' , 'C' ])
620+
621+ ind = df .set_index (['A' , 'B' , 'C' ], drop = False )
622+ selection = ind .loc [(slice (None ), slice (None ), 'I' ), cols ]
623+ result = selection .unstack ()
624+
625+ expected = ind .iloc [[0 ]][cols ]
626+ expected .columns = MultiIndex .from_product ([expected .columns , ['I' ]],
627+ names = [None , 'C' ])
628+ expected .index = expected .index .droplevel ('C' )
629+ tm .assert_frame_equal (result , expected )
630+
563631 def test_unstack_nan_index (self ): # GH7466
564632 cast = lambda val : '{0:1}' .format ('' if val != val else val )
565633 nan = np .nan
0 commit comments