1- import string
2-
31import numpy as np
42import pandas .util .testing as tm
53from pandas import (Series , DataFrame , MultiIndex , Int64Index , Float64Index ,
6- IntervalIndex , IndexSlice )
4+ IntervalIndex , IndexSlice , concat , date_range )
75from .pandas_vb_common import setup , Panel # noqa
86
97
@@ -79,27 +77,27 @@ class NonNumericSeriesIndexing(object):
7977 param_names = ['index' ]
8078
8179 def setup (self , index ):
82- N = 10 ** 6
80+ N = 10 ** 5
8381 indexes = {'string' : tm .makeStringIndex (N ),
84- 'datetime' : tm . makeTimeSeries ( N )}
82+ 'datetime' : date_range ( '1900' , periods = N , freq = 's' )}
8583 index = indexes [index ]
8684 self .s = Series (np .random .rand (N ), index = index )
87- self .lbl = index [800000 ]
85+ self .lbl = index [80000 ]
8886
89- def time_getitem_label_slice (self ):
87+ def time_getitem_label_slice (self , index ):
9088 self .s [:self .lbl ]
9189
92- def time_getitem_pos_slice (self ):
93- self .s [:800000 ]
90+ def time_getitem_pos_slice (self , index ):
91+ self .s [:80000 ]
9492
95- def time_get_value (self ):
93+ def time_get_value (self , index ):
9694 self .s .get_value (self .lbl )
9795
9896 def time_getitem_scalar (self , index ):
9997 self .s [self .lbl ]
10098
10199
102- class DataFrameIndexing (object ):
100+ class DataFrameStringIndexing (object ):
103101
104102 goal_time = 0.2
105103
@@ -108,67 +106,71 @@ def setup(self):
108106 columns = tm .makeStringIndex (30 )
109107 self .df = DataFrame (np .random .randn (1000 , 30 ), index = index ,
110108 columns = columns )
111- self .idx = index [100 ]
112- self .col = columns [10 ]
113-
114- self .df2 = DataFrame (np .random .randn (10000 , 4 ),
115- columns = ['A' , 'B' , 'C' , 'D' ])
116- self .indexer = self .df2 ['B' ] > 0
117- self .obj_indexer = self .indexer .astype ('O' )
118-
119- # dupes
120- self .idx_dupe = np .array (range (30 )) * 99
121- self .df3 = DataFrame ({'A' : [0.1 ] * 1000 , 'B' : [1 ] * 1000 })
122- self .df3 = concat ([self .df3 , 2 * self .df3 , 3 * self .df3 ])
123-
124- self .df_big = DataFrame (dict (A = ['foo' ] * 1000000 ))
109+ self .idx_scalar = index [100 ]
110+ self .col_scalar = columns [10 ]
111+ self .bool_indexer = self .df [self .col_scalar ] > 0
112+ self .bool_obj_indexer = self .bool_indexer .astype (object )
125113
126114 def time_get_value (self ):
127- self .df .get_value (self .idx , self .col )
115+ self .df .get_value (self .idx_scalar , self .col_scalar )
116+
117+ def time_ix (self ):
118+ self .df .ix [self .idx_scalar , self .col_scalar ]
128119
129- def time_get_value_ix (self ):
130- self .df .ix [( self .idx , self .col ) ]
120+ def time_loc (self ):
121+ self .df .loc [ self .idx_scalar , self .col_scalar ]
131122
132123 def time_getitem_scalar (self ):
133- self .df [self .col ][self .idx ]
124+ self .df [self .col_scalar ][self .idx_scalar ]
134125
135126 def time_boolean_rows (self ):
136- self .df2 [self .indexer ]
127+ self .df [self .bool_indexer ]
137128
138129 def time_boolean_rows_object (self ):
139- self .df2 [self .obj_indexer ]
130+ self .df [self .bool_obj_indexer ]
131+
132+
133+ class DataFrameNumericIndexing (object ):
134+
135+ goal_time = 0.2
136+
137+ def setup (self ):
138+ self .idx_dupe = np .array (range (30 )) * 99
139+ self .df = DataFrame (np .random .randn (10000 , 5 ))
140+ self .df_dup = concat ([self .df , 2 * self .df , 3 * self .df ])
141+ self .bool_indexer = [True ] * 5000 + [False ] * 5000
140142
141143 def time_iloc_dups (self ):
142- self .df3 .iloc [self .idx_dupe ]
144+ self .df_dup .iloc [self .idx_dupe ]
143145
144146 def time_loc_dups (self ):
145- self .df3 .loc [self .idx_dupe ]
147+ self .df_dup .loc [self .idx_dupe ]
146148
147- def time_iloc_big (self ):
148- self .df_big .iloc [:100 , 0 ]
149+ def time_iloc (self ):
150+ self .df .iloc [:100 , 0 ]
149151
152+ def time_loc (self ):
153+ self .df .loc [:100 , 0 ]
150154
151- class IndexingMethods (object ):
152- # GH 13166
153- goal_time = 0.2
155+ def time_bool_indexer (self ):
156+ self .df [self .bool_indexer ]
154157
155- def setup (self ):
156- N = 100000
157- a = np .arange (N )
158- self .ind = Float64Index (a * 4.8000000418824129e-08 )
159158
160- self .s = Series (np .random .rand (N ))
161- self .ts = Series (np .random .rand (N ),
162- index = date_range ('2011-01-01' , freq = 'S' , periods = N ))
163- self .indexer = [True , False , True , True , False ] * 20000
159+ class Take (object ):
164160
165- def time_get_loc_float (self ):
166- self .ind .get_loc (0 )
161+ goal_time = 0.2
162+ params = ['int' , 'datetime' ]
163+ param_names = ['index' ]
167164
168- def time_take_dtindex (self ):
169- self .ts .take (self .indexer )
165+ def setup (self , index ):
166+ N = 100000
167+ indexes = {'int' : Int64Index (np .arange (N )),
168+ 'datetime' : date_range ('2011-01-01' , freq = 'S' , periods = N )}
169+ index = indexes [index ]
170+ self .s = Series (np .random .rand (N ), index = index )
171+ self .indexer = [True , False , True , True , False ] * 20000
170172
171- def time_take_intindex (self ):
173+ def time_take (self , index ):
172174 self .s .take (self .indexer )
173175
174176
@@ -177,11 +179,10 @@ class MultiIndexing(object):
177179 goal_time = 0.2
178180
179181 def setup (self ):
180- self . mi = MultiIndex .from_product ([range (1000 ), range (1000 )])
181- self .s = Series (np .random .randn (1000000 ), index = self . mi )
182+ mi = MultiIndex .from_product ([range (1000 ), range (1000 )])
183+ self .s = Series (np .random .randn (1000000 ), index = mi )
182184 self .df = DataFrame (self .s )
183185
184- # slicers
185186 n = 100000
186187 self .mdt = DataFrame ({'A' : np .random .choice (range (10000 , 45000 , 1000 ),
187188 n ),
@@ -191,68 +192,16 @@ def setup(self):
191192 'x' : np .random .choice (range (400 ), n ),
192193 'y' : np .random .choice (range (25 ), n )})
193194 self .idx = IndexSlice [20000 :30000 , 20 :30 , 35 :45 , 30000 :40000 ]
194- self .mdt2 = self .mdt .set_index (['A' , 'B' , 'C' , 'D' ]).sortlevel ()
195- self .miint = MultiIndex .from_product ([np .arange (1000 ),
196- np .arange (1000 )],
197- names = ['one' , 'two' ])
198- self .obj_index = np .array ([(0 , 10 ), (0 , 11 ), (0 , 12 ),
199- (0 , 13 ), (0 , 14 ), (0 , 15 ),
200- (0 , 16 ), (0 , 17 ), (0 , 18 ),
201- (0 , 19 )], dtype = object )
202-
203- self .mi_large = MultiIndex .from_product (
204- [np .arange (1000 ), np .arange (20 ), list (string .ascii_letters )],
205- names = ['one' , 'two' , 'three' ])
206- self .mi_med = MultiIndex .from_product (
207- [np .arange (1000 ), np .arange (10 ), list ('A' )],
208- names = ['one' , 'two' , 'three' ])
209- self .mi_small = MultiIndex .from_product (
210- [np .arange (100 ), list ('A' ), list ('A' )],
211- names = ['one' , 'two' , 'three' ])
212-
213- size = 65536
214- self .mi_unused_levels = pd .MultiIndex .from_arrays ([
215- rng .randint (0 , 8192 , size ),
216- rng .randint (0 , 1024 , size )])[rng .random .rand (size ) < 0.1 ]
217-
218- def time_series_xs_mi_ix (self ):
195+ self .mdt = self .mdt .set_index (['A' , 'B' , 'C' , 'D' ]).sort_index ()
196+
197+ def time_series_ix (self ):
219198 self .s .ix [999 ]
220199
221- def time_frame_xs_mi_ix (self ):
200+ def time_frame_ix (self ):
222201 self .df .ix [999 ]
223202
224- def time_multiindex_slicers (self ):
225- self .mdt2 .loc [self .idx , :]
226-
227- def time_multiindex_get_indexer (self ):
228- self .miint .get_indexer (self .obj_index )
229-
230- def time_multiindex_large_get_loc (self ):
231- self .mi_large .get_loc ((999 , 19 , 'Z' ))
232-
233- def time_multiindex_large_get_loc_warm (self ):
234- for _ in range (1000 ):
235- self .mi_large .get_loc ((999 , 19 , 'Z' ))
236-
237- def time_multiindex_med_get_loc (self ):
238- self .mi_med .get_loc ((999 , 9 , 'A' ))
239-
240- def time_multiindex_med_get_loc_warm (self ):
241- for _ in range (1000 ):
242- self .mi_med .get_loc ((999 , 9 , 'A' ))
243-
244- def time_multiindex_string_get_loc (self ):
245- self .mi_small .get_loc ((99 , 'A' , 'A' ))
246-
247- def time_multiindex_small_get_loc_warm (self ):
248- for _ in range (1000 ):
249- self .mi_small .get_loc ((99 , 'A' , 'A' ))
250-
251- def time_is_monotonic (self ):
252- self .miint .is_monotonic
253-
254- def time_remove_unused_levels (self ):
255- self .mi_unused_levels .remove_unused_levels ()
203+ def time_index_slice (self ):
204+ self .mdt .loc [self .idx , :]
256205
257206
258207class IntervalIndexing (object ):
@@ -307,20 +256,6 @@ def time_lookup_loc(self, s):
307256 s .loc
308257
309258
310- class BooleanRowSelect (object ):
311-
312- goal_time = 0.2
313-
314- def setup (self ):
315- N = 10000
316- self .df = DataFrame (np .random .randn (N , 100 ))
317- self .bool_arr = np .zeros (N , dtype = bool )
318- self .bool_arr [:1000 ] = True
319-
320- def time_frame_boolean_row_select (self ):
321- self .df [self .bool_arr ]
322-
323-
324259class GetItemSingleColumn (object ):
325260
326261 goal_time = 0.2
@@ -342,7 +277,7 @@ class AssignTimeseriesIndex(object):
342277
343278 def setup (self ):
344279 N = 100000
345- dx = date_range ('1/1/2000' , periods = N , freq = 'H' )
280+ idx = date_range ('1/1/2000' , periods = N , freq = 'H' )
346281 self .df = DataFrame (np .random .randn (N , 1 ), columns = ['A' ], index = idx )
347282
348283 def time_frame_assign_timeseries_index (self ):
0 commit comments