@@ -48,23 +48,23 @@ class _Unstacker(object):
4848 >>> import pandas as pd
4949 >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
5050 ... ('two', 'a'), ('two', 'b')])
51- >>> s = pd.Series(np.arange(1.0 , 5.0 ), index=index)
51+ >>> s = pd.Series(np.arange(1, 5, dtype=np.int64 ), index=index)
5252 >>> s
53- one a 1
54- b 2
55- two a 3
56- b 4
57- dtype: float64
53+ one a 1
54+ b 2
55+ two a 3
56+ b 4
57+ dtype: int64
5858
5959 >>> s.unstack(level=-1)
60- a b
60+ a b
6161 one 1 2
6262 two 3 4
6363
6464 >>> s.unstack(level=0)
6565 one two
66- a 1 2
67- b 3 4
66+ a 1 3
67+ b 2 4
6868
6969 Returns
7070 -------
@@ -789,18 +789,18 @@ def lreshape(data, groups, dropna=True, label=None):
789789 >>> import pandas as pd
790790 >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
791791 ... 'team': ['Red Sox', 'Yankees'],
792- ... 'year1': [2007, 2008 ], 'year2': [2008, 2008]})
792+ ... 'year1': [2007, 2007 ], 'year2': [2008, 2008]})
793793 >>> data
794794 hr1 hr2 team year1 year2
795795 0 514 545 Red Sox 2007 2008
796796 1 573 526 Yankees 2007 2008
797797
798798 >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
799- team hr year
800- 0 Red Sox 514 2007
801- 1 Yankees 573 2007
802- 2 Red Sox 545 2008
803- 3 Yankees 526 2008
799+ team year hr
800+ 0 Red Sox 2007 514
801+ 1 Yankees 2007 573
802+ 2 Red Sox 2008 545
803+ 3 Yankees 2008 526
804804
805805 Returns
806806 -------
@@ -905,11 +905,12 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
905905 ... })
906906 >>> df["id"] = df.index
907907 >>> df
908- A1970 A1980 B1970 B1980 X id
908+ A1970 A1980 B1970 B1980 X id
909909 0 a d 2.5 3.2 -1.085631 0
910910 1 b e 1.2 1.3 0.997345 1
911911 2 c f 0.7 0.1 0.282978 2
912912 >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
913+ ... # doctest: +NORMALIZE_WHITESPACE
913914 X A B
914915 id year
915916 0 1970 -1.085631 a 2.5
@@ -940,6 +941,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
940941 8 3 3 2.1 2.9
941942 >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
942943 >>> l
944+ ... # doctest: +NORMALIZE_WHITESPACE
943945 ht
944946 famid birth age
945947 1 1 1 2.8
@@ -979,41 +981,44 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
979981
980982 Less wieldy column names are also handled
981983
984+ >>> np.random.seed(0)
982985 >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
983986 ... 'A(quarterly)-2011': np.random.rand(3),
984987 ... 'B(quarterly)-2010': np.random.rand(3),
985988 ... 'B(quarterly)-2011': np.random.rand(3),
986989 ... 'X' : np.random.randint(3, size=3)})
987990 >>> df['id'] = df.index
988- >>> df
989- A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011
990- 0 0.531828 0.724455 0.322959 0.293714
991- 1 0.634401 0.611024 0.361789 0.630976
992- 2 0.849432 0.722443 0.228263 0.092105
993- \
991+ >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
992+ A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ...
993+ 0 0.548814 0.544883 0.437587 ...
994+ 1 0.715189 0.423655 0.891773 ...
995+ 2 0.602763 0.645894 0.963663 ...
994996 X id
995997 0 0 0
996998 1 1 1
997- 2 2 2
998- >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'],
999- i='id', j='year', sep='-')
1000- X A(quarterly) B(quarterly)
999+ 2 1 2
1000+
1001+ >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
1002+ ... j='year', sep='-')
1003+ ... # doctest: +NORMALIZE_WHITESPACE
1004+ X A(quarterly) B(quarterly)
10011005 id year
1002- 0 2010 0 0.531828 0.322959
1003- 1 2010 2 0.634401 0.361789
1004- 2 2010 2 0.849432 0.228263
1005- 0 2011 0 0.724455 0.293714
1006- 1 2011 2 0.611024 0.630976
1007- 2 2011 2 0.722443 0.092105
1006+ 0 2010 0 0.548814 0.437587
1007+ 1 2010 1 0.715189 0.891773
1008+ 2 2010 1 0.602763 0.963663
1009+ 0 2011 0 0.544883 0.383442
1010+ 1 2011 1 0.423655 0.791725
1011+ 2 2011 1 0.645894 0.528895
10081012
10091013 If we have many columns, we could also use a regex to find our
10101014 stubnames and pass that list on to wide_to_long
10111015
1012- >>> stubnames = set([match[0] for match in
1013- df.columns.str.findall('[A-B]\(.*\)').values
1014- if match != [] ])
1016+ >>> stubnames = sorted(
1017+ ... set([match[0] for match in df.columns.str.findall(
1018+ ... r'[A-B]\(.*\)').values if match != [] ])
1019+ ... )
10151020 >>> list(stubnames)
1016- ['B (quarterly)', 'A (quarterly)']
1021+ ['A (quarterly)', 'B (quarterly)']
10171022
10181023 Notes
10191024 -----
@@ -1133,7 +1138,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
11331138 2 0 0 1
11341139
11351140 >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
1136- 'C': [1, 2, 3]})
1141+ ... 'C': [1, 2, 3]})
11371142
11381143 >>> pd.get_dummies(df, prefix=['col1', 'col2'])
11391144 C col1_a col1_b col2_a col2_b col2_c
@@ -1149,7 +1154,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
11491154 3 1 0 0
11501155 4 1 0 0
11511156
1152- >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True))
1157+ >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
11531158 b c
11541159 0 0 0
11551160 1 1 0
0 commit comments