|
6 | 6 | import numpy as np |
7 | 7 |
|
8 | 8 |
|
9 | | -def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'): |
10 | | - """Calculate the score at the given `per` percentile of the sequence `a`. |
11 | | -
|
12 | | - For example, the score at `per=50` is the median. If the desired quantile |
13 | | - lies between two data points, we interpolate between them, according to |
14 | | - the value of `interpolation`. If the parameter `limit` is provided, it |
15 | | - should be a tuple (lower, upper) of two values. Values of `a` outside |
16 | | - this (closed) interval will be ignored. |
17 | | -
|
18 | | - The `interpolation_method` parameter supports three values, namely |
19 | | - `fraction` (default), `lower` and `higher`. Interpolation is done only, |
20 | | - if the desired quantile lies between two data points `i` and `j`. For |
21 | | - `fraction`, the result is an interpolated value between `i` and `j`; |
22 | | - for `lower`, the result is `i`, for `higher` the result is `j`. |
23 | | -
|
24 | | - Parameters |
25 | | - ---------- |
26 | | - a : ndarray |
27 | | - Values from which to extract score. |
28 | | - per : scalar |
29 | | - Percentile at which to extract score. |
30 | | - limit : tuple, optional |
31 | | - Tuple of two scalars, the lower and upper limits within which to |
32 | | - compute the percentile. |
33 | | - interpolation_method : {'fraction', 'lower', 'higher'}, optional |
34 | | - This optional parameter specifies the interpolation method to use, |
35 | | - when the desired quantile lies between two data points `i` and `j`: |
36 | | -
|
37 | | - - fraction: `i + (j - i)*fraction`, where `fraction` is the |
38 | | - fractional part of the index surrounded by `i` and `j`. |
39 | | - - lower: `i`. |
40 | | - - higher: `j`. |
41 | | -
|
42 | | - Returns |
43 | | - ------- |
44 | | - score : float |
45 | | - Score at percentile. |
46 | | -
|
47 | | - See Also |
48 | | - -------- |
49 | | - percentileofscore |
50 | | -
|
51 | | - Examples |
52 | | - -------- |
53 | | - >>> from scipy import stats |
54 | | - >>> a = np.arange(100) |
55 | | - >>> stats.scoreatpercentile(a, 50) |
56 | | - 49.5 |
57 | | -
|
58 | | - """ |
59 | | - # TODO: this should be a simple wrapper around a well-written quantile |
60 | | - # function. GNU R provides 9 quantile algorithms (!), with differing |
61 | | - # behaviour at, for example, discontinuities. |
62 | | - values = np.sort(a, axis=0) |
63 | | - if limit: |
64 | | - values = values[(limit[0] <= values) & (values <= limit[1])] |
65 | | - |
66 | | - idx = per / 100. * (values.shape[0] - 1) |
67 | | - if idx % 1 == 0: |
68 | | - score = values[idx] |
69 | | - else: |
70 | | - if interpolation_method == 'fraction': |
71 | | - score = _interpolate(values[int(idx)], values[int(idx) + 1], |
72 | | - idx % 1) |
73 | | - elif interpolation_method == 'lower': |
74 | | - score = values[np.floor(idx)] |
75 | | - elif interpolation_method == 'higher': |
76 | | - score = values[np.ceil(idx)] |
77 | | - else: |
78 | | - raise ValueError("interpolation_method can only be 'fraction', " |
79 | | - "'lower' or 'higher'") |
80 | | - |
81 | | - return score |
82 | | - |
83 | | - |
84 | | -def _interpolate(a, b, fraction): |
85 | | - """Returns the point at the given fraction between a and b, where |
86 | | - 'fraction' must be between 0 and 1. |
87 | | - """ |
88 | | - return a + (b - a) * fraction |
89 | | - |
90 | | - |
91 | 9 | def rankdata(a): |
92 | 10 | """ |
93 | 11 | Ranks the data, dealing with ties appropriately. |
|
0 commit comments