|
1 | | -from .pandas_vb_common import * |
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
2 | 3 |
|
| 4 | +from .pandas_vb_common import setup # noqa |
3 | 5 |
|
4 | | -def _set_use_bottleneck_False(): |
5 | | - try: |
6 | | - pd.options.compute.use_bottleneck = False |
7 | | - except: |
8 | | - from pandas.core import nanops |
9 | | - nanops._USE_BOTTLENECK = False |
10 | 6 |
|
| 7 | +ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem', |
| 8 | + 'var'] |
11 | 9 |
|
12 | | -class FrameOps(object): |
13 | | - goal_time = 0.2 |
14 | | - |
15 | | - param_names = ['op', 'use_bottleneck', 'dtype', 'axis'] |
16 | | - params = [['mean', 'sum', 'median'], |
17 | | - [True, False], |
18 | | - ['float', 'int'], |
19 | | - [0, 1]] |
20 | | - |
21 | | - def setup(self, op, use_bottleneck, dtype, axis): |
22 | | - if dtype == 'float': |
23 | | - self.df = DataFrame(np.random.randn(100000, 4)) |
24 | | - elif dtype == 'int': |
25 | | - self.df = DataFrame(np.random.randint(1000, size=(100000, 4))) |
26 | | - |
27 | | - if not use_bottleneck: |
28 | | - _set_use_bottleneck_False() |
29 | | - |
30 | | - self.func = getattr(self.df, op) |
31 | | - |
32 | | - def time_op(self, op, use_bottleneck, dtype, axis): |
33 | | - self.func(axis=axis) |
34 | 10 |
|
| 11 | +class FrameOps(object): |
35 | 12 |
|
36 | | -class stat_ops_level_frame_sum(object): |
37 | 13 | goal_time = 0.2 |
| 14 | + params = [ops, ['float', 'int'], [0, 1], [True, False]] |
| 15 | + param_names = ['op', 'dtype', 'axis', 'use_bottleneck'] |
38 | 16 |
|
39 | | - def setup(self): |
40 | | - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
41 | | - random.shuffle(self.index.values) |
42 | | - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
43 | | - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
44 | | - |
45 | | - def time_stat_ops_level_frame_sum(self): |
46 | | - self.df.sum(level=1) |
47 | | - |
48 | | - |
49 | | -class stat_ops_level_frame_sum_multiple(object): |
50 | | - goal_time = 0.2 |
| 17 | + def setup(self, op, dtype, axis, use_bottleneck): |
| 18 | + df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) |
| 19 | + try: |
| 20 | + pd.options.compute.use_bottleneck = use_bottleneck |
| 21 | + except: |
| 22 | + from pandas.core import nanops |
| 23 | + nanops._USE_BOTTLENECK = use_bottleneck |
| 24 | + self.df_func = getattr(df, op) |
51 | 25 |
|
52 | | - def setup(self): |
53 | | - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
54 | | - random.shuffle(self.index.values) |
55 | | - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
56 | | - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
| 26 | + def time_op(self, op, dtype, axis, use_bottleneck): |
| 27 | + self.df_func(axis=axis) |
57 | 28 |
|
58 | | - def time_stat_ops_level_frame_sum_multiple(self): |
59 | | - self.df.sum(level=[0, 1]) |
60 | 29 |
|
| 30 | +class FrameMultiIndexOps(object): |
61 | 31 |
|
62 | | -class stat_ops_level_series_sum(object): |
63 | 32 | goal_time = 0.2 |
| 33 | + params = ([0, 1, [0, 1]], ops) |
| 34 | + param_names = ['level', 'op'] |
64 | 35 |
|
65 | | - def setup(self): |
66 | | - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
67 | | - random.shuffle(self.index.values) |
68 | | - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
69 | | - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
| 36 | + def setup(self, level, op): |
| 37 | + levels = [np.arange(10), np.arange(100), np.arange(100)] |
| 38 | + labels = [np.arange(10).repeat(10000), |
| 39 | + np.tile(np.arange(100).repeat(100), 10), |
| 40 | + np.tile(np.tile(np.arange(100), 100), 10)] |
| 41 | + index = pd.MultiIndex(levels=levels, labels=labels) |
| 42 | + df = pd.DataFrame(np.random.randn(len(index), 4), index=index) |
| 43 | + self.df_func = getattr(df, op) |
70 | 44 |
|
71 | | - def time_stat_ops_level_series_sum(self): |
72 | | - self.df[1].sum(level=1) |
| 45 | + def time_op(self, level, op): |
| 46 | + self.df_func(level=level) |
73 | 47 |
|
74 | 48 |
|
75 | | -class stat_ops_level_series_sum_multiple(object): |
76 | | - goal_time = 0.2 |
77 | | - |
78 | | - def setup(self): |
79 | | - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) |
80 | | - random.shuffle(self.index.values) |
81 | | - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) |
82 | | - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) |
83 | | - |
84 | | - def time_stat_ops_level_series_sum_multiple(self): |
85 | | - self.df[1].sum(level=[0, 1]) |
| 49 | +class SeriesOps(object): |
86 | 50 |
|
87 | | - |
88 | | -class stat_ops_series_std(object): |
89 | 51 | goal_time = 0.2 |
| 52 | + params = [ops, ['float', 'int'], [True, False]] |
| 53 | + param_names = ['op', 'dtype', 'use_bottleneck'] |
90 | 54 |
|
91 | | - def setup(self): |
92 | | - self.s = Series(np.random.randn(100000), index=np.arange(100000)) |
93 | | - self.s[::2] = np.nan |
94 | | - |
95 | | - def time_stat_ops_series_std(self): |
96 | | - self.s.std() |
| 55 | + def setup(self, op, dtype, use_bottleneck): |
| 56 | + s = pd.Series(np.random.randn(100000)).astype(dtype) |
| 57 | + try: |
| 58 | + pd.options.compute.use_bottleneck = use_bottleneck |
| 59 | + except: |
| 60 | + from pandas.core import nanops |
| 61 | + nanops._USE_BOTTLENECK = use_bottleneck |
| 62 | + self.s_func = getattr(s, op) |
97 | 63 |
|
| 64 | + def time_op(self, op, dtype, use_bottleneck): |
| 65 | + self.s_func() |
98 | 66 |
|
99 | | -class stats_corr_spearman(object): |
100 | | - goal_time = 0.2 |
101 | 67 |
|
102 | | - def setup(self): |
103 | | - self.df = DataFrame(np.random.randn(1000, 30)) |
| 68 | +class SeriesMultiIndexOps(object): |
104 | 69 |
|
105 | | - def time_stats_corr_spearman(self): |
106 | | - self.df.corr(method='spearman') |
107 | | - |
108 | | - |
109 | | -class stats_rank2d_axis0_average(object): |
110 | 70 | goal_time = 0.2 |
| 71 | + params = ([0, 1, [0, 1]], ops) |
| 72 | + param_names = ['level', 'op'] |
111 | 73 |
|
112 | | - def setup(self): |
113 | | - self.df = DataFrame(np.random.randn(5000, 50)) |
114 | | - |
115 | | - def time_stats_rank2d_axis0_average(self): |
116 | | - self.df.rank() |
| 74 | + def setup(self, level, op): |
| 75 | + levels = [np.arange(10), np.arange(100), np.arange(100)] |
| 76 | + labels = [np.arange(10).repeat(10000), |
| 77 | + np.tile(np.arange(100).repeat(100), 10), |
| 78 | + np.tile(np.tile(np.arange(100), 100), 10)] |
| 79 | + index = pd.MultiIndex(levels=levels, labels=labels) |
| 80 | + s = pd.Series(np.random.randn(len(index)), index=index) |
| 81 | + self.s_func = getattr(s, op) |
117 | 82 |
|
| 83 | + def time_op(self, level, op): |
| 84 | + self.s_func(level=level) |
118 | 85 |
|
119 | | -class stats_rank2d_axis1_average(object): |
120 | | - goal_time = 0.2 |
121 | | - |
122 | | - def setup(self): |
123 | | - self.df = DataFrame(np.random.randn(5000, 50)) |
124 | 86 |
|
125 | | - def time_stats_rank2d_axis1_average(self): |
126 | | - self.df.rank(1) |
| 87 | +class Rank(object): |
127 | 88 |
|
128 | | - |
129 | | -class stats_rank_average(object): |
130 | | - goal_time = 0.2 |
131 | | - |
132 | | - def setup(self): |
133 | | - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) |
134 | | - self.s = Series(self.values) |
135 | | - |
136 | | - def time_stats_rank_average(self): |
137 | | - self.s.rank() |
138 | | - |
139 | | - |
140 | | -class stats_rank_average_int(object): |
141 | | - goal_time = 0.2 |
142 | | - |
143 | | - def setup(self): |
144 | | - self.values = np.random.randint(0, 100000, size=200000) |
145 | | - self.s = Series(self.values) |
146 | | - |
147 | | - def time_stats_rank_average_int(self): |
148 | | - self.s.rank() |
149 | | - |
150 | | - |
151 | | -class stats_rank_pct_average(object): |
152 | 89 | goal_time = 0.2 |
| 90 | + params = [['DataFrame', 'Series'], [True, False]] |
| 91 | + param_names = ['constructor', 'pct'] |
153 | 92 |
|
154 | | - def setup(self): |
155 | | - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) |
156 | | - self.s = Series(self.values) |
| 93 | + def setup(self, constructor, pct): |
| 94 | + values = np.random.randn(10**5) |
| 95 | + self.data = getattr(pd, constructor)(values) |
157 | 96 |
|
158 | | - def time_stats_rank_pct_average(self): |
159 | | - self.s.rank(pct=True) |
| 97 | + def time_rank(self, constructor, pct): |
| 98 | + self.data.rank(pct=pct) |
160 | 99 |
|
| 100 | + def time_average_old(self, constructor, pct): |
| 101 | + self.data.rank(pct=pct) / len(self.data) |
161 | 102 |
|
162 | | -class stats_rank_pct_average_old(object): |
163 | | - goal_time = 0.2 |
164 | | - |
165 | | - def setup(self): |
166 | | - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) |
167 | | - self.s = Series(self.values) |
168 | | - |
169 | | - def time_stats_rank_pct_average_old(self): |
170 | | - (self.s.rank() / len(self.s)) |
171 | 103 |
|
| 104 | +class Correlation(object): |
172 | 105 |
|
173 | | -class stats_rolling_mean(object): |
174 | 106 | goal_time = 0.2 |
| 107 | + params = ['spearman', 'kendall', 'pearson'] |
| 108 | + param_names = ['method'] |
175 | 109 |
|
176 | | - def setup(self): |
177 | | - self.arr = np.random.randn(100000) |
178 | | - self.win = 100 |
179 | | - |
180 | | - def time_rolling_mean(self): |
181 | | - rolling_mean(self.arr, self.win) |
182 | | - |
183 | | - def time_rolling_median(self): |
184 | | - rolling_median(self.arr, self.win) |
185 | | - |
186 | | - def time_rolling_min(self): |
187 | | - rolling_min(self.arr, self.win) |
188 | | - |
189 | | - def time_rolling_max(self): |
190 | | - rolling_max(self.arr, self.win) |
191 | | - |
192 | | - def time_rolling_sum(self): |
193 | | - rolling_sum(self.arr, self.win) |
194 | | - |
195 | | - def time_rolling_std(self): |
196 | | - rolling_std(self.arr, self.win) |
197 | | - |
198 | | - def time_rolling_var(self): |
199 | | - rolling_var(self.arr, self.win) |
200 | | - |
201 | | - def time_rolling_skew(self): |
202 | | - rolling_skew(self.arr, self.win) |
| 110 | + def setup(self, method): |
| 111 | + self.df = pd.DataFrame(np.random.randn(1000, 30)) |
203 | 112 |
|
204 | | - def time_rolling_kurt(self): |
205 | | - rolling_kurt(self.arr, self.win) |
| 113 | + def time_corr(self, method): |
| 114 | + self.df.corr(method=method) |
0 commit comments