11# -*- coding: utf-8 -*-
22
3- import warnings
4- from itertools import product
5-
63import numpy as np
74import pandas as pd
85import pandas .util .testing as tm
96import pytest
107from pandas import (DataFrame , DatetimeIndex , Float64Index , Index , Int64Index ,
118 MultiIndex , PeriodIndex , TimedeltaIndex , UInt64Index ,
129 compat , date_range , period_range )
13- from pandas .compat import lrange , range , u
10+ from pandas .compat import lrange , range
1411from pandas .core .dtypes .dtypes import CategoricalDtype
1512from pandas .core .indexes .datetimelike import DatetimeIndexOpsMixin
1613from pandas .util .testing import assert_copy
@@ -385,10 +382,6 @@ def test_sub(idx):
385382 first .tolist () - idx [- 3 :]
386383
387384
388- def test_nlevels (idx ):
389- assert idx .nlevels == 2
390-
391-
392385def test_argsort (idx ):
393386 result = idx .argsort ()
394387 expected = idx .values .argsort ()
@@ -410,249 +403,6 @@ def test_remove_unused_nan(level0, level1):
410403 assert ('unused' not in result .levels [level ])
411404
412405
413- @pytest .mark .parametrize ('names' , [None , ['first' , 'second' ]])
414- def test_unique (names ):
415- mi = pd .MultiIndex .from_arrays ([[1 , 2 , 1 , 2 ], [1 , 1 , 1 , 2 ]],
416- names = names )
417-
418- res = mi .unique ()
419- exp = pd .MultiIndex .from_arrays ([[1 , 2 , 2 ], [1 , 1 , 2 ]], names = mi .names )
420- tm .assert_index_equal (res , exp )
421-
422- mi = pd .MultiIndex .from_arrays ([list ('aaaa' ), list ('abab' )],
423- names = names )
424- res = mi .unique ()
425- exp = pd .MultiIndex .from_arrays ([list ('aa' ), list ('ab' )],
426- names = mi .names )
427- tm .assert_index_equal (res , exp )
428-
429- mi = pd .MultiIndex .from_arrays ([list ('aaaa' ), list ('aaaa' )],
430- names = names )
431- res = mi .unique ()
432- exp = pd .MultiIndex .from_arrays ([['a' ], ['a' ]], names = mi .names )
433- tm .assert_index_equal (res , exp )
434-
435- # GH #20568 - empty MI
436- mi = pd .MultiIndex .from_arrays ([[], []], names = names )
437- res = mi .unique ()
438- tm .assert_index_equal (mi , res )
439-
440-
441- def test_unique_datetimelike ():
442- idx1 = pd .DatetimeIndex (['2015-01-01' , '2015-01-01' , '2015-01-01' ,
443- '2015-01-01' , 'NaT' , 'NaT' ])
444- idx2 = pd .DatetimeIndex (['2015-01-01' , '2015-01-01' , '2015-01-02' ,
445- '2015-01-02' , 'NaT' , '2015-01-01' ],
446- tz = 'Asia/Tokyo' )
447- result = pd .MultiIndex .from_arrays ([idx1 , idx2 ]).unique ()
448-
449- eidx1 = pd .DatetimeIndex (['2015-01-01' , '2015-01-01' , 'NaT' , 'NaT' ])
450- eidx2 = pd .DatetimeIndex (['2015-01-01' , '2015-01-02' ,
451- 'NaT' , '2015-01-01' ],
452- tz = 'Asia/Tokyo' )
453- exp = pd .MultiIndex .from_arrays ([eidx1 , eidx2 ])
454- tm .assert_index_equal (result , exp )
455-
456-
457- @pytest .mark .parametrize ('level' , [0 , 'first' , 1 , 'second' ])
458- def test_unique_level (idx , level ):
459- # GH #17896 - with level= argument
460- result = idx .unique (level = level )
461- expected = idx .get_level_values (level ).unique ()
462- tm .assert_index_equal (result , expected )
463-
464- # With already unique level
465- mi = pd .MultiIndex .from_arrays ([[1 , 3 , 2 , 4 ], [1 , 3 , 2 , 5 ]],
466- names = ['first' , 'second' ])
467- result = mi .unique (level = level )
468- expected = mi .get_level_values (level )
469- tm .assert_index_equal (result , expected )
470-
471- # With empty MI
472- mi = pd .MultiIndex .from_arrays ([[], []], names = ['first' , 'second' ])
473- result = mi .unique (level = level )
474- expected = mi .get_level_values (level )
475-
476-
477- def test_multiindex_compare ():
478- # GH 21149
479- # Ensure comparison operations for MultiIndex with nlevels == 1
480- # behave consistently with those for MultiIndex with nlevels > 1
481-
482- midx = pd .MultiIndex .from_product ([[0 , 1 ]])
483-
484- # Equality self-test: MultiIndex object vs self
485- expected = pd .Series ([True , True ])
486- result = pd .Series (midx == midx )
487- tm .assert_series_equal (result , expected )
488-
489- # Greater than comparison: MultiIndex object vs self
490- expected = pd .Series ([False , False ])
491- result = pd .Series (midx > midx )
492- tm .assert_series_equal (result , expected )
493-
494-
495- def test_duplicate_multiindex_labels ():
496- # GH 17464
497- # Make sure that a MultiIndex with duplicate levels throws a ValueError
498- with pytest .raises (ValueError ):
499- ind = pd .MultiIndex ([['A' ] * 10 , range (10 )], [[0 ] * 10 , range (10 )])
500-
501- # And that using set_levels with duplicate levels fails
502- ind = MultiIndex .from_arrays ([['A' , 'A' , 'B' , 'B' , 'B' ],
503- [1 , 2 , 1 , 2 , 3 ]])
504- with pytest .raises (ValueError ):
505- ind .set_levels ([['A' , 'B' , 'A' , 'A' , 'B' ], [2 , 1 , 3 , - 2 , 5 ]],
506- inplace = True )
507-
508-
509- @pytest .mark .parametrize ('names' , [['a' , 'b' , 'a' ], ['1' , '1' , '2' ],
510- ['1' , 'a' , '1' ]])
511- def test_duplicate_level_names (names ):
512- # GH18872
513- pytest .raises (ValueError , pd .MultiIndex .from_product ,
514- [[0 , 1 ]] * 3 , names = names )
515-
516- # With .rename()
517- mi = pd .MultiIndex .from_product ([[0 , 1 ]] * 3 )
518- tm .assert_raises_regex (ValueError , "Duplicated level name:" ,
519- mi .rename , names )
520-
521- # With .rename(., level=)
522- mi .rename (names [0 ], level = 1 , inplace = True )
523- tm .assert_raises_regex (ValueError , "Duplicated level name:" ,
524- mi .rename , names [:2 ], level = [0 , 2 ])
525-
526-
527- def test_duplicate_meta_data ():
528- # GH 10115
529- index = MultiIndex (
530- levels = [[0 , 1 ], [0 , 1 , 2 ]],
531- labels = [[0 , 0 , 0 , 0 , 1 , 1 , 1 ],
532- [0 , 1 , 2 , 0 , 0 , 1 , 2 ]])
533-
534- for idx in [index ,
535- index .set_names ([None , None ]),
536- index .set_names ([None , 'Num' ]),
537- index .set_names (['Upper' , 'Num' ]), ]:
538- assert idx .has_duplicates
539- assert idx .drop_duplicates ().names == idx .names
540-
541-
542- def test_duplicates (idx ):
543- assert not idx .has_duplicates
544- assert idx .append (idx ).has_duplicates
545-
546- index = MultiIndex (levels = [[0 , 1 ], [0 , 1 , 2 ]], labels = [
547- [0 , 0 , 0 , 0 , 1 , 1 , 1 ], [0 , 1 , 2 , 0 , 0 , 1 , 2 ]])
548- assert index .has_duplicates
549-
550- # GH 9075
551- t = [(u ('x' ), u ('out' ), u ('z' ), 5 , u ('y' ), u ('in' ), u ('z' ), 169 ),
552- (u ('x' ), u ('out' ), u ('z' ), 7 , u ('y' ), u ('in' ), u ('z' ), 119 ),
553- (u ('x' ), u ('out' ), u ('z' ), 9 , u ('y' ), u ('in' ), u ('z' ), 135 ),
554- (u ('x' ), u ('out' ), u ('z' ), 13 , u ('y' ), u ('in' ), u ('z' ), 145 ),
555- (u ('x' ), u ('out' ), u ('z' ), 14 , u ('y' ), u ('in' ), u ('z' ), 158 ),
556- (u ('x' ), u ('out' ), u ('z' ), 16 , u ('y' ), u ('in' ), u ('z' ), 122 ),
557- (u ('x' ), u ('out' ), u ('z' ), 17 , u ('y' ), u ('in' ), u ('z' ), 160 ),
558- (u ('x' ), u ('out' ), u ('z' ), 18 , u ('y' ), u ('in' ), u ('z' ), 180 ),
559- (u ('x' ), u ('out' ), u ('z' ), 20 , u ('y' ), u ('in' ), u ('z' ), 143 ),
560- (u ('x' ), u ('out' ), u ('z' ), 21 , u ('y' ), u ('in' ), u ('z' ), 128 ),
561- (u ('x' ), u ('out' ), u ('z' ), 22 , u ('y' ), u ('in' ), u ('z' ), 129 ),
562- (u ('x' ), u ('out' ), u ('z' ), 25 , u ('y' ), u ('in' ), u ('z' ), 111 ),
563- (u ('x' ), u ('out' ), u ('z' ), 28 , u ('y' ), u ('in' ), u ('z' ), 114 ),
564- (u ('x' ), u ('out' ), u ('z' ), 29 , u ('y' ), u ('in' ), u ('z' ), 121 ),
565- (u ('x' ), u ('out' ), u ('z' ), 31 , u ('y' ), u ('in' ), u ('z' ), 126 ),
566- (u ('x' ), u ('out' ), u ('z' ), 32 , u ('y' ), u ('in' ), u ('z' ), 155 ),
567- (u ('x' ), u ('out' ), u ('z' ), 33 , u ('y' ), u ('in' ), u ('z' ), 123 ),
568- (u ('x' ), u ('out' ), u ('z' ), 12 , u ('y' ), u ('in' ), u ('z' ), 144 )]
569-
570- index = pd .MultiIndex .from_tuples (t )
571- assert not index .has_duplicates
572-
573- # handle int64 overflow if possible
574- def check (nlevels , with_nulls ):
575- labels = np .tile (np .arange (500 ), 2 )
576- level = np .arange (500 )
577-
578- if with_nulls : # inject some null values
579- labels [500 ] = - 1 # common nan value
580- labels = [labels .copy () for i in range (nlevels )]
581- for i in range (nlevels ):
582- labels [i ][500 + i - nlevels // 2 ] = - 1
583-
584- labels += [np .array ([- 1 , 1 ]).repeat (500 )]
585- else :
586- labels = [labels ] * nlevels + [np .arange (2 ).repeat (500 )]
587-
588- levels = [level ] * nlevels + [[0 , 1 ]]
589-
590- # no dups
591- index = MultiIndex (levels = levels , labels = labels )
592- assert not index .has_duplicates
593-
594- # with a dup
595- if with_nulls :
596- def f (a ):
597- return np .insert (a , 1000 , a [0 ])
598- labels = list (map (f , labels ))
599- index = MultiIndex (levels = levels , labels = labels )
600- else :
601- values = index .values .tolist ()
602- index = MultiIndex .from_tuples (values + [values [0 ]])
603-
604- assert index .has_duplicates
605-
606- # no overflow
607- check (4 , False )
608- check (4 , True )
609-
610- # overflow possible
611- check (8 , False )
612- check (8 , True )
613-
614- # GH 9125
615- n , k = 200 , 5000
616- levels = [np .arange (n ), tm .makeStringIndex (n ), 1000 + np .arange (n )]
617- labels = [np .random .choice (n , k * n ) for lev in levels ]
618- mi = MultiIndex (levels = levels , labels = labels )
619-
620- for keep in ['first' , 'last' , False ]:
621- left = mi .duplicated (keep = keep )
622- right = pd ._libs .hashtable .duplicated_object (mi .values , keep = keep )
623- tm .assert_numpy_array_equal (left , right )
624-
625- # GH5873
626- for a in [101 , 102 ]:
627- mi = MultiIndex .from_arrays ([[101 , a ], [3.5 , np .nan ]])
628- assert not mi .has_duplicates
629-
630- with warnings .catch_warnings (record = True ):
631- # Deprecated - see GH20239
632- assert mi .get_duplicates ().equals (MultiIndex .from_arrays (
633- [[], []]))
634-
635- tm .assert_numpy_array_equal (mi .duplicated (), np .zeros (
636- 2 , dtype = 'bool' ))
637-
638- for n in range (1 , 6 ): # 1st level shape
639- for m in range (1 , 5 ): # 2nd level shape
640- # all possible unique combinations, including nan
641- lab = product (range (- 1 , n ), range (- 1 , m ))
642- mi = MultiIndex (levels = [list ('abcde' )[:n ], list ('WXYZ' )[:m ]],
643- labels = np .random .permutation (list (lab )).T )
644- assert len (mi ) == (n + 1 ) * (m + 1 )
645- assert not mi .has_duplicates
646-
647- with warnings .catch_warnings (record = True ):
648- # Deprecated - see GH20239
649- assert mi .get_duplicates ().equals (MultiIndex .from_arrays (
650- [[], []]))
651-
652- tm .assert_numpy_array_equal (mi .duplicated (), np .zeros (
653- len (mi ), dtype = 'bool' ))
654-
655-
656406def test_map (idx ):
657407 # callable
658408 index = idx
0 commit comments