diff --git a/code/parsing-engine/build.gradle b/code/parsing-engine/build.gradle index 0a9c572ce5..3be30d04b5 100644 --- a/code/parsing-engine/build.gradle +++ b/code/parsing-engine/build.gradle @@ -14,9 +14,18 @@ plugins { repositories { // Use JCenter for resolving dependencies. jcenter() + mavenCentral() } dependencies { + implementation group: 'org.apache.lucene', name: 'lucene-core', version: '8.1.0' + implementation group: 'org.apache.lucene', name: 'lucene-queryparser', version: '8.1.0' + implementation group: 'org.apache.lucene', name: 'lucene-analyzers-common', version: '8.1.0' + implementation group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1.1' + + + + // Use JUnit Jupiter API for testing. implementation 'commons-lang:commons-lang:2.6' implementation 'edu.stanford.nlp:stanford-corenlp:4.3.1' diff --git a/code/parsing-engine/src/main/java/edu/illinois/phantom/analysisengine/ScoringEngine.java b/code/parsing-engine/src/main/java/edu/illinois/phantom/analysisengine/ScoringEngine.java new file mode 100644 index 0000000000..1cb4265bbd --- /dev/null +++ b/code/parsing-engine/src/main/java/edu/illinois/phantom/analysisengine/ScoringEngine.java @@ -0,0 +1,192 @@ +package edu.illinois.phantom.analysisengine; + +import edu.illinois.phantom.model.UserQuery; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.*; +import org.apache.lucene.store.FSDirectory; +import org.json.simple.JSONArray; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Paths; +import java.util.*; +import java.util.logging.Logger; + + +public class ScoringEngine { + private static final Logger LOGGER = Logger.getLogger(ScoringEngine.class.getName()); + + private static Analyzer analyzer = new SimpleAnalyzer(); + private IndexWriter writer; + private ArrayList queue = new ArrayList<>(); + + ScoringEngine() throws IOException { + FSDirectory dir = FSDirectory.open(Paths.get(getClass().getResource("/CORPUS").getFile())); + IndexWriterConfig config = new IndexWriterConfig(analyzer); + writer = new IndexWriter(dir, config); + } + + public void indexFilesDirectory() throws IOException { + + addFiles(new File(getClass().getResource("/CORPUS").getFile())); + + + queue.forEach(file -> { + try { + + FileReader fr = new FileReader(file); + Object obj = new JSONParser().parse(fr); + JSONObject jo = (JSONObject) obj; + String location = (String) jo.get("location"); + JSONArray ja = (JSONArray) jo.get("skills"); + String allSkills = " "; + Iterator itr2 = ja.iterator(); + Document document = new Document(); + document.add(new StringField("path", file.getPath(), Field.Store.YES)); + document.add(new StringField("filename", file.getName(), Field.Store.YES)); + document.add(new StringField("location", location, Field.Store.YES)); + while (itr2.hasNext()) { + Iterator itr1 = ((Map) itr2.next()).entrySet().iterator(); + int duration=0; + String skill = null; + while (itr1.hasNext()) { + Map.Entry pair = itr1.next(); + if(pair.getKey().toString().equalsIgnoreCase("duration")) { + duration = Integer.parseInt(pair.getValue().toString()); + } + if(pair.getKey().toString().equalsIgnoreCase("skill")) { + skill = pair.getValue().toString(); + } + + } + allSkills = allSkills + skill; + String skills = skill.toUpperCase()+"_FIELD"; + //document.add(new LegacyIntField(skills, duration ,Field.Store.YES)); + + document.add(new IntPoint(skills, duration)); + document.add(new StoredField(skills,duration)); + document.add(new StringField("allSkills", allSkills, Field.Store.YES)); + writer.addDocument(document); + } + fr.close(); + } + catch (Exception e) { + e.printStackTrace(); + } + + }); + + queue.clear(); + writer.commit(); + writer.close(); + + + } + + public Set searchQuery(List userQuery) throws IOException { + IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(getClass().getResource("/CORPUS").getFile()))); + IndexSearcher searcher = new IndexSearcher(reader); + +// Query query = IntRange.newWithinQuery("Java_FIELD",new int[] {5},new int[] {Integer.MAX_VALUE}); +// Query query2 = IntRange.newWithinQuery("Kafka_FIELD", new int[] {1},new int[] {Integer.MAX_VALUE}); +// Query query3 = IntRange.newWithinQuery("Angular_FIELD", new int[] {10},new int[] {Integer.MAX_VALUE}); + + /*Query query = NumericRangeQuery.newIntRange("Java_FIELD",5,50,true,true); + Query query2 = NumericRangeQuery.newIntRange("Kafka_FIELD", 1,30,true,true); + Query query3 = NumericRangeQuery.newIntRange("Angular_FIELD", 10,100,true,true); + query.setBoost((float) 2.0); + + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(query, BooleanClause.Occur.SHOULD); + booleanQuery.add(query2, BooleanClause.Occur.SHOULD); + booleanQuery.add(query3, BooleanClause.Occur.SHOULD); + +*/ + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + userQuery.forEach(inputQuery -> { + Query query; + if(inputQuery.isMandatorySkill()) { + query = new BoostQuery(IntPoint.newRangeQuery(inputQuery.getSkill(), inputQuery.getMinExperience() + , Integer.MAX_VALUE), (float) inputQuery.getMinExperience()); + } + else { + query = IntPoint.newRangeQuery(inputQuery.getSkill(), inputQuery.getMinExperience() + , Integer.MAX_VALUE); + } + + builder.add(query,BooleanClause.Occur.SHOULD); + }); + + BooleanQuery booleanQuery = builder.build(); + + TopScoreDocCollector collector = null; + HashSet resultset = new LinkedHashSet<>(); + + try { + collector = TopScoreDocCollector.create(100,Integer.MAX_VALUE); //Scoring for all the documents. + searcher.search(booleanQuery, collector); + ScoreDoc[] hits = collector.topDocs().scoreDocs; + + for (int i = 0; i < Math.min(50, hits.length); ++i) { + int docId = hits[i].doc; + Document d = searcher.doc(docId); + resultset.add(d.get("location")); + String location = d.get("location"); + System.out.println("File location--->>>" + location + " Score-->>>" + hits[i].score); + } + } + catch (Exception e) { + e.printStackTrace(); + } + System.out.println("Result Document-->>" + resultset); + return resultset; + } + + private void addFiles(File file) { + + if (!file.exists()) { + System.out.println(file + " does not exist."); + } + if (file.isDirectory()) { + for (File f : file.listFiles()) { + addFiles(f); + } + } else { + String filename = file.getName().toLowerCase(); + // =================================================== + // Only index text files + // =================================================== + if (filename.endsWith(".json")) { + queue.add(file); + } else { + System.out.println("Skipped " + filename); + } + } + } + + public static void main(String args[]) throws IOException { + ScoringEngine scoringEngine = new ScoringEngine(); + scoringEngine.indexFilesDirectory(); + //TODO: Remove Later + UserQuery query1 = new UserQuery("JAVA",15,true); + UserQuery query2 = new UserQuery("KAFKA",5,true); + UserQuery query3 = new UserQuery("ANGULAR",2,false); + + ArrayList userQueryArrayList = new ArrayList<>(); + userQueryArrayList.add(query1); + userQueryArrayList.add(query2); + userQueryArrayList.add(query3); + + scoringEngine.searchQuery(userQueryArrayList); + } + +} diff --git a/code/parsing-engine/src/main/java/edu/illinois/phantom/model/UserQuery.java b/code/parsing-engine/src/main/java/edu/illinois/phantom/model/UserQuery.java new file mode 100644 index 0000000000..cae5d2165d --- /dev/null +++ b/code/parsing-engine/src/main/java/edu/illinois/phantom/model/UserQuery.java @@ -0,0 +1,42 @@ +package edu.illinois.phantom.model; + +import lombok.Builder; +import lombok.ToString; + +@Builder +@ToString +public class UserQuery { + private String skill; + private int minExperience; + boolean mandatorySkill; + + public UserQuery(String skill, int minExperience, boolean mandatorySkill) { + this.skill = skill.toUpperCase()+"_FIELD"; + this.minExperience = minExperience; + this.mandatorySkill = mandatorySkill; + } + + public String getSkill() { + return skill; + } + + public void setSkill(String skill) { + this.skill = skill.toUpperCase()+"_FIELD";; + } + + public int getMinExperience() { + return minExperience; + } + + public void setMinExperience(int minExperience) { + this.minExperience = minExperience; + } + + public boolean isMandatorySkill() { + return mandatorySkill; + } + + public void setMandatorySkill(boolean mandatorySkill) { + this.mandatorySkill = mandatorySkill; + } +} diff --git a/code/parsing-engine/src/main/resources/CORPUS/REsume_Java.txt b/code/parsing-engine/src/main/resources/CORPUS/REsume_Java.txt new file mode 100644 index 0000000000..b544d529d4 --- /dev/null +++ b/code/parsing-engine/src/main/resources/CORPUS/REsume_Java.txt @@ -0,0 +1,5 @@ +Java 15 +Javascript 2 +Spark 5 +ExtJS 1 +Angular 1 \ No newline at end of file diff --git a/code/parsing-engine/src/main/resources/CORPUS/Resume_UI.txt b/code/parsing-engine/src/main/resources/CORPUS/Resume_UI.txt new file mode 100644 index 0000000000..765a1f2a91 --- /dev/null +++ b/code/parsing-engine/src/main/resources/CORPUS/Resume_UI.txt @@ -0,0 +1,4 @@ +ExtJS 5 +Angular 10 +Java 2 +Javascript 2 \ No newline at end of file