1- from .pandas_vb_common import *
2- from pandas import melt , wide_to_long
1+ from itertools import product
32
3+ import numpy as np
4+ from pandas import DataFrame , MultiIndex , date_range , melt , wide_to_long
5+
6+ from .pandas_vb_common import setup # noqa
7+
8+
9+ class Melt (object ):
410
5- class melt_dataframe (object ):
611 goal_time = 0.2
712
813 def setup (self ):
9- self .index = MultiIndex .from_arrays ([np .arange (100 ).repeat (100 ), np .roll (np .tile (np .arange (100 ), 100 ), 25 )])
10- self .df = DataFrame (np .random .randn (10000 , 4 ), index = self .index )
1114 self .df = DataFrame (np .random .randn (10000 , 3 ), columns = ['A' , 'B' , 'C' ])
1215 self .df ['id1' ] = np .random .randint (0 , 10 , 10000 )
1316 self .df ['id2' ] = np .random .randint (100 , 1000 , 10000 )
@@ -16,110 +19,102 @@ def time_melt_dataframe(self):
1619 melt (self .df , id_vars = ['id1' , 'id2' ])
1720
1821
19- class reshape_pivot_time_series (object ):
22+ class Pivot (object ):
23+
2024 goal_time = 0.2
2125
2226 def setup (self ):
23- self . index = MultiIndex . from_arrays ([ np . arange ( 100 ). repeat ( 100 ), np . roll ( np . tile ( np . arange ( 100 ), 100 ), 25 )])
24- self . df = DataFrame ( np . random . randn ( 10000 , 4 ), index = self . index )
25- self . index = date_range ( '1/1/2000' , periods = 10000 , freq = 'h' )
26- self . df = DataFrame ( randn ( 10000 , 50 ), index = self . index , columns = range (50 ))
27- self . pdf = self . unpivot ( self . df )
28- self .f = ( lambda : self . pdf . pivot ( 'date' , 'variable' , 'value' ) )
27+ N = 10000
28+ index = date_range ( '1/1/2000' , periods = N , freq = 'h' )
29+ data = { 'value' : np . random . randn ( N * 50 ),
30+ 'variable' : np . arange (50 ). repeat ( N ),
31+ 'date' : np . tile ( index . values , 50 )}
32+ self .df = DataFrame ( data )
2933
3034 def time_reshape_pivot_time_series (self ):
31- self .f ( )
35+ self .df . pivot ( 'date' , 'variable' , 'value' )
3236
33- def unpivot (self , frame ):
34- (N , K ) = frame .shape
35- self .data = {'value' : frame .values .ravel ('F' ), 'variable' : np .asarray (frame .columns ).repeat (N ), 'date' : np .tile (np .asarray (frame .index ), K ), }
36- return DataFrame (self .data , columns = ['date' , 'variable' , 'value' ])
3737
38+ class SimpleReshape (object ):
3839
39- class reshape_stack_simple (object ):
4040 goal_time = 0.2
4141
4242 def setup (self ):
43- self .index = MultiIndex .from_arrays ([np .arange (100 ).repeat (100 ), np .roll (np .tile (np .arange (100 ), 100 ), 25 )])
44- self .df = DataFrame (np .random .randn (10000 , 4 ), index = self .index )
43+ arrays = [np .arange (100 ).repeat (100 ),
44+ np .roll (np .tile (np .arange (100 ), 100 ), 25 )]
45+ index = MultiIndex .from_arrays (arrays )
46+ self .df = DataFrame (np .random .randn (10000 , 4 ), index = index )
4547 self .udf = self .df .unstack (1 )
4648
47- def time_reshape_stack_simple (self ):
49+ def time_stack (self ):
4850 self .udf .stack ()
4951
50-
51- class reshape_unstack_simple (object ):
52- goal_time = 0.2
53-
54- def setup (self ):
55- self .index = MultiIndex .from_arrays ([np .arange (100 ).repeat (100 ), np .roll (np .tile (np .arange (100 ), 100 ), 25 )])
56- self .df = DataFrame (np .random .randn (10000 , 4 ), index = self .index )
57-
58- def time_reshape_unstack_simple (self ):
52+ def time_unstack (self ):
5953 self .df .unstack (1 )
6054
6155
62- class reshape_unstack_large_single_dtype (object ):
56+ class Unstack (object ):
57+
6358 goal_time = 0.2
6459
6560 def setup (self ):
6661 m = 100
6762 n = 1000
6863
6964 levels = np .arange (m )
70- index = pd . MultiIndex .from_product ([levels ]* 2 )
65+ index = MultiIndex .from_product ([levels ] * 2 )
7166 columns = np .arange (n )
72- values = np .arange (m * m * n ).reshape (m * m , n )
73- self .df = pd . DataFrame (values , index , columns )
67+ values = np .arange (m * m * n ).reshape (m * m , n )
68+ self .df = DataFrame (values , index , columns )
7469 self .df2 = self .df .iloc [:- 1 ]
7570
76- def time_unstack_full_product (self ):
71+ def time_full_product (self ):
7772 self .df .unstack ()
7873
79- def time_unstack_with_mask (self ):
74+ def time_without_last_row (self ):
8075 self .df2 .unstack ()
8176
8277
83- class unstack_sparse_keyspace (object ):
78+ class SparseIndex (object ):
79+
8480 goal_time = 0.2
8581
8682 def setup (self ):
87- self .index = MultiIndex .from_arrays ([np .arange (100 ).repeat (100 ), np .roll (np .tile (np .arange (100 ), 100 ), 25 )])
88- self .df = DataFrame (np .random .randn (10000 , 4 ), index = self .index )
89- self .NUM_ROWS = 1000
90- for iter in range (10 ):
91- self .df = DataFrame ({'A' : np .random .randint (50 , size = self .NUM_ROWS ), 'B' : np .random .randint (50 , size = self .NUM_ROWS ), 'C' : np .random .randint ((- 10 ), 10 , size = self .NUM_ROWS ), 'D' : np .random .randint ((- 10 ), 10 , size = self .NUM_ROWS ), 'E' : np .random .randint (10 , size = self .NUM_ROWS ), 'F' : np .random .randn (self .NUM_ROWS ), })
92- self .idf = self .df .set_index (['A' , 'B' , 'C' , 'D' , 'E' ])
93- if (len (self .idf .index .unique ()) == self .NUM_ROWS ):
94- break
83+ NUM_ROWS = 1000
84+ self .df = DataFrame ({'A' : np .random .randint (50 , size = NUM_ROWS ),
85+ 'B' : np .random .randint (50 , size = NUM_ROWS ),
86+ 'C' : np .random .randint (- 10 , 10 , size = NUM_ROWS ),
87+ 'D' : np .random .randint (- 10 , 10 , size = NUM_ROWS ),
88+ 'E' : np .random .randint (10 , size = NUM_ROWS ),
89+ 'F' : np .random .randn (NUM_ROWS )})
90+ self .df = self .df .set_index (['A' , 'B' , 'C' , 'D' , 'E' ])
91+
92+ def time_unstack (self ):
93+ self .df .unstack ()
9594
96- def time_unstack_sparse_keyspace (self ):
97- self .idf .unstack ()
9895
96+ class WideToLong (object ):
9997
100- class wide_to_long_big (object ):
10198 goal_time = 0.2
10299
103100 def setup (self ):
104- vars = 'ABCD'
105101 nyrs = 20
106102 nidvars = 20
107103 N = 5000
108- yrvars = []
109- for var in vars :
110- for yr in range (1 , nyrs + 1 ):
111- yrvars .append (var + str (yr ))
104+ self .letters = list ('ABCD' )
105+ yrvars = [l + str (num )
106+ for l , num in product (self .letters , range (1 , nyrs + 1 ))]
112107
113- self .df = pd . DataFrame (np .random .randn (N , nidvars + len (yrvars )),
114- columns = list (range (nidvars )) + yrvars )
115- self .vars = vars
108+ self .df = DataFrame (np .random .randn (N , nidvars + len (yrvars )),
109+ columns = list (range (nidvars )) + yrvars )
110+ self .df [ 'id' ] = self . df . index
116111
117112 def time_wide_to_long_big (self ):
118- self .df ['id' ] = self .df .index
119- wide_to_long (self .df , list (self .vars ), i = 'id' , j = 'year' )
113+ wide_to_long (self .df , self .letters , i = 'id' , j = 'year' )
120114
121115
122116class PivotTable (object ):
117+
123118 goal_time = 0.2
124119
125120 def setup (self ):
0 commit comments