-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathData_completion.R
More file actions
55 lines (46 loc) · 1.32 KB
/
Data_completion.R
File metadata and controls
55 lines (46 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#source("E:/code/R/2016 regular analysis/new.R")
new <- read.csv("E:\\code\\R\\2016 regular analysis\\train.csv",header=T)
jun <- read.csv("E:\\code\\R\\2016 regular analysis\\introduction.csv",header = T)
row <- length(new[,1])
column <- length(new[1,])
row
column
average = jun[,4]
variable = jun[,1]
#data.frame中的NA值替换成0
new[is.na(new)] <- 0
new
class(new)
newo = data.frame(RA = 0,GENDER = 0,DRWITHGL = 0,NRWITHGL = 0,RAXISLEG = 0)
newo = newo[-1,]
#查找NA并补为平均数,删除无效样本,count是个计数器,用于计数样本中缺失变量的个数
i = 1
while(i <= row ){
count = 0
for (j in 1:column ){
if (new[i,j]==0){
new[i,j] = average[j]
count = count +1
}
}
#无效样本的定义为缺失的个数大于总变量数的一半
if (count> column/2){
dele = i
new=new[-i,]
cat ("delete the row of ",dele,"\n")
print (i)
#i = i - 1
}
i = i + 1
}
print(new)
#重新写一遍train数据,删除了4条无效样本(无效样本的定义为缺失的个数大于总变量数的一半)
for (i in 1:length(new[,1]) ){
for (j in 1:length(new[1,]) ){
newo[i,j] = new[i,j]
}
}
print (newo)
#新的train数据写入csv格式
write.csv(newo,file = "E:\\code\\R\\2016 regular analysis\\trainNew.csv")
write.csv(variable,file = "E:\\code\\R\\2016 regular analysis\\variable.csv")