forked from madscientistjaidev/TemporalEventExtraction
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrainModel.py
More file actions
61 lines (49 loc) · 1.69 KB
/
trainModel.py
File metadata and controls
61 lines (49 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
import os
'''
This file will act as a python module for Training CRF models using Stanford CRF classifiers and list of training files
'''
#Function to train model using files in directory
def train(list):
#modify properties file
modifyProp_train(list)
#delete train model
if os.path.exists("stanford-ner-2015-12-09/ner-model.ser.gz"):
deleteTrain_model("ner-model.ser.gz")
#START TRAINING THE MODEL WITH PROPERTIES FILE CONTAINING FEATURE SPECIFICATION
os.chdir("stanford-ner-2015-12-09")
os.system('java -cp "stanford-ner.jar:lib/*" -mx4g edu.stanford.nlp.ie.crf.CRFClassifier -prop train.prop')
os.chdir("..")
#delete already trained model file
def deleteTrain_model(filename):
os.remove("stanford-ner-2015-12-09"+'/' +filename)
#modify properties file
def modifyProp_train(list):
#Specify filename
filename = "stanford-ner-2015-12-09/train.prop"
#clear contents of file
os.remove(filename)
#Open default-prop file to read
with open("default.prop", "r+") as file:
prop = file.readlines()
prop2 = prop[0:]
#Merge arguments
prop1 = "trainFileList = " + extract(list)
prop = prop1+"".join([str(i) for i in prop2])
#Write everything back
with open(filename, 'w') as file:
file.writelines(prop)
#function to extract list
def extract(list):
tokens = ""
i = 0
for element in list:
token = element.split("/")
t = "data/"+"silver-col/"+"inputCol/"+token[3]
if(i > 0):
tokens = tokens + "," + t
else:
tokens = tokens + t
i = i+1
tokens = tokens + "\n"
return tokens