forked from ewrfcas/Machine-Learning-Toolbox
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataPreprocessing.py
More file actions
31 lines (28 loc) · 1.3 KB
/
DataPreprocessing.py
File metadata and controls
31 lines (28 loc) · 1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import numpy as np
def MajorityAndMinority(y):
labels = np.unique(y)
if len(np.where(y == labels[0])[0]) >= len(np.where(y == labels[1])[0]):
majority_label = labels[0]
majority_num = len(np.where(y == labels[0])[0])
minority_label = labels[1]
minority_num = y.shape[0] - majority_num
else:
majority_label = labels[1]
majority_num = len(np.where(y == labels[1])[0])
minority_label = labels[0]
minority_num = y.shape[0] - majority_num
return majority_label,majority_num,minority_label,minority_num
#binary
#ratio: majority'=(majority-minority)*ratio+minoirty
def RandomUnderSampling(X,y,ratio=0,random_seed=2,replace=False):
np.random.seed(random_seed)
majority_label, majority_num, minority_label, minority_num=MajorityAndMinority(y)
majority_X=X[y==majority_label,:]
minority_X=X[y==minority_label,:]
random_index=np.random.choice(np.arange(majority_num),int(minority_num+(majority_num-minority_num)*ratio),replace=replace)
majority_X=majority_X[random_index,:]
final_X=np.concatenate((minority_X,majority_X),axis=0)
final_y=np.ones(final_X.shape[0])
final_y[0:minority_num]=final_y[0:minority_num] * minority_label
final_y[minority_num:] = final_y[minority_num:] * majority_label
return final_X,final_y