forked from madscientistjaidev/TemporalEventExtraction
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtestRun.py
More file actions
69 lines (59 loc) · 2.24 KB
/
testRun.py
File metadata and controls
69 lines (59 loc) · 2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
import os
import glob
'''
This file will act as a python module for Training CRF models using Stanford CRF classifiers and list of training files
'''
#output files
filepath = "stanford-ner-2015-12-09/data/TE3-platinum-test-text"
Testdir = "data/TE3-platinum-test-text/*.txt"
directory = "data/te3-platinum-col/inputCol"
targetdirectory = "data/TE3-platinum-test-output-final"
#Function to train model using files in directory
def run(fileName):
os.chdir("stanford-ner-2015-12-09")
#Classify Data
os.system('java -mx4g -cp "stanford-ner.jar:lib/*" edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier ner-model.ser.gz -textFile '+fileName+ ' -outputFormat tsv > '+targetdirectory+'/'+tsvParse(fileName))
os.chdir("..")
#Test Procedure
def test():
if os.path.exists("stanford-ner-2015-12-09/ner-model.ser.gz"):
#extract names of .txt files
filenames = extractWriteNames(filepath)
#create directory
if not os.path.exists("stanford-ner-2015-12-09/"+targetdirectory):
os.makedirs("stanford-ner-2015-12-09/"+targetdirectory)
print("TESTING")
#Run for each .txt file
for file in filenames:
run(file)
stat()
else:
print ("FAILED :NO TRAINING MODEL FOUND")
#Test Data Statisitcs - Compare against Annotated Test Data
def stat():
os.chdir("stanford-ner-2015-12-09")
os.system('java -mx4g -cp "stanford-ner.jar:lib/*" edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier ner-model.ser.gz -testFile '+directory+'/' +' > '+'data/finalOutputAgainstActual.txt')
os.chdir("..")
#Extract Filenames
def extractWriteNames(filepath):
#extract names of files in folder
allFiles = glob.glob("stanford-ner-2015-12-09/data/TE3-platinum-test-text/*.txt")
#convert filePaths
test_name = extract(allFiles)
return test_name
#function to extract list
def extract(list):
tokens = []
i = 0
for element in list:
token = element.split("/")
t = "data/TE3-platinum-test-text/"+token[3]
tokens.append(t)
i = i+1
return tokens
def tsvParse(filename):
file = filename.split("/")
file_tsv = file[2].split(".")
file_tsv_name = file_tsv[0]+".tsv"
return file_tsv_name