singnet · edyirdaw · Feb 13, 2019 · Feb 13, 2019
diff --git a/Docker/requirements.txt b/Docker/requirements.txt
@@ -10,3 +10,4 @@ tables
 matplotlib
 nltk
 stemming==1.0
+flask-httpauth
diff --git a/analysis_results.py b/analysis_results.py
@@ -0,0 +1,122 @@
+# Tested on python3.6
+
+
+import time
+import csv
+import numpy as np
+
+import os
+import sys
+import pathlib
+import logging
+
+sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[0])+'/topic-analysis/plsa-service/plsa')
+
+from flask import Flask, jsonify
+from flask import make_response
+from flask import request
+from flask_httpauth import HTTPBasicAuth
+auth = HTTPBasicAuth()
+
+app = Flask(__name__)
+
+
+
+# '/topic-analysis/api/v1.0/results'
+@app.route('/topic-analysis/api/v1.0/results', methods=['GET'])
+# @auth.login_required
+def results():
+
+
+    try:
+        # Code to test exception handler for this try
+        # a=1/0
+
+        print('In generate_topics:', time.strftime("%c"))
+        handle = request.args['handle']
+        print("handle =", handle)
+
+    except Exception as e:
+
+        logging.exception("message")
+        return make_response(jsonify({'Error': 'Request was not fulfilled. Please try again.', "error_msg": str(e)}),400)
+
+
+
+
+    try:
+
+        parameters_path = str(pathlib.Path(os.path.abspath('')).parents[0]) + '/appData/plsa/' + 'plsa-parameters/' + handle + '/'
+        print(parameters_path)
+
+        with open(parameters_path + 'plsa_topics.txt', 'r') as f:
+            topics = f.read().splitlines()
+
+        topic_by_doc = []
+        word_by_topic_conditional = []
+        logLikelihoods = []
+        docs_list = []
+
+        with open(parameters_path + 'topic-by-doc-matirx.csv') as csv_file:
+            csv_reader = csv.reader(csv_file, delimiter=',')
+
+            docs_list = next(csv_reader)[1:]
+
+            for row in csv_reader:
+                topic_by_doc.append(list((np.array(row[1:])).astype(np.float)))
+
+        with open(parameters_path + 'topic_probability_pz', 'r') as f:
+            topic_probabilities = f.read().splitlines()
+
+            topic_probabilities = list((np.array(topic_probabilities)).astype(np.float))
+
+        with open(parameters_path + 'word_by_topic_conditional.csv') as csv_file:
+            csv_reader = csv.reader(csv_file, delimiter=',')
+
+            for row in csv_reader:
+                word_by_topic_conditional.append(list((np.array(row[:-1])).astype(np.float)))
+
+        with open(parameters_path + 'logL.txt', 'r') as f:
+            logLikelihoods = f.read().splitlines()
+
+            logLikelihoods = list((np.array(logLikelihoods)).astype(np.float))
+
+        # resp = topic_analysis_pb2.PLSAResponse(status=True, message='success', docs_list=docs_list, topics=topics,
+        #                                        topicByDocMatirx=topic_by_doc, topicProbabilities=topic_probabilities,
+        #                                        wordByTopicConditional=word_by_topic_conditional,
+        #                                        logLikelihoods=logLikelihoods)
+
+
+    except Exception as e:
+
+        logging.exception("message")
+
+        # NOT: This line is tested: it throws back error message correctly
+
+        return make_response(jsonify({'Error': 'Request was not fulfilled. Please try again.', "error_msg": str(e)}), 500)
+
+
+
+
+
+
+
+@app.errorhandler(404)
+def not_found(error):
+    print ('In not_found:', time.strftime("%c"))
+    return make_response(jsonify({'Error': 'Not found'}), 404)
+
+
+
+
+
+__end__ = '__end__'
+
+
+
+if __name__ == '__main__':
+
+
+
+    # app.run(debug=True)
+    app.run(debug=False)
diff --git a/plsa-service/plsa/plsa_wrapper.py b/plsa-service/plsa/plsa_wrapper.py
@@ -12,6 +12,7 @@
 import datetime
 import time
 import json
+import logging
 
 sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[1])+'/plsa-service/plsa')
 sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[1])+'/plsa-service/preprocessing')
@@ -44,7 +45,11 @@ def __init__(self, docs,local=False):
         self.PLSA_PARAMETERS_PATH = ''
 
         # self.messages
-        # self.unique_folder_naming
+        self.unique_folder_naming = None
+        self.num_topics = None
+        self.topic_divider = None
+        self.max_iter = None
+        self.beta = None
 
     def __del__(self):
 
@@ -57,7 +62,7 @@ def write_to_json(self):
 
 
 
-        self.unique_folder_naming = str(datetime.datetime.now()).replace(':','-').replace('.','-') + '^' + str(random.randint(100000000000, 999999999999)) + '/'
+        # self.unique_folder_naming = str(datetime.datetime.now()).replace(':','-').replace('.','-') + '^' + str(random.randint(100000000000, 999999999999)) + '/'
         print(self.unique_folder_naming)
 
         os.mkdir(self.extracted_folder+self.unique_folder_naming)
@@ -90,11 +95,20 @@ def generate_topics_json(self):
 
 
         # Do cleansing on the data and turing it to bad-of-words model
+
+        with open(self.plsa_parameters_path + self.unique_folder_naming+'status.txt','w') as f:
+            f.write('Preprocessing started.')
+
         pclean.pre_pro()
 
+        with open(self.plsa_parameters_path + self.unique_folder_naming+'status.txt','w') as f:
+            f.write('Preprocessing finished. Topic analysis started.')
+
         # Train using PLSA
-        pplsa.topic_divider = 0
-        pplsa.num_topics = 2
+        pplsa.topic_divider = self.topic_divider
+        pplsa.num_topics = self.num_topics
+        pplsa.maxiter2 = self.max_iter
+        pplsa.beta = self.beta
         pplsa.folder = pclean.output_dir[:-1]
         pplsa.dict_path = pclean.file_dict
         pplsa.PLSA_PARAMETERS_PATH = self.plsa_parameters_path + self.unique_folder_naming
@@ -107,14 +121,24 @@ def generate_topics_json(self):
         self.output_dir_stream = pclean.output_dir
         self.file_dict_stream = pclean.file_dict
 
-        os.mkdir(pplsa.PLSA_PARAMETERS_PATH)
+
+        try:
+            os.mkdir(pplsa.PLSA_PARAMETERS_PATH)
+        except:
+            print('-----------------------Folder exists-------------------------')
+
 
         pplsa.main()
 
+
         end_time_1 = time.time()
 
         print('Total training time took:',round((end_time_1 - start_time_1) / 60, 4))
 
+        with open(self.plsa_parameters_path + self.unique_folder_naming+'status.txt','w') as f:
+            f.write('Topic analysis finished.\n')
+            f.write(str(round((end_time_1 - start_time_1) / 60, 4)))
+
 
 
 

diff --git a/service_spec/topic_analysis.proto b/service_spec/topic_analysis.proto
@@ -22,12 +22,7 @@ message PLSAResponse{
 
     bool status = 1;
     string message = 2;
-    repeated string docs_list = 3;
-    repeated string topics = 4;
-    repeated FloatRow topicByDocMatirx = 5;
-    repeated double topicProbabilities = 6;
-    repeated FloatRow wordByTopicConditional = 7;
-    repeated double logLikelihoods = 8;
+    string handle = 3;
 
 
 }

diff --git a/snet_test_client.py b/snet_test_client.py
@@ -58,12 +58,12 @@ def csv_reader():
 
 
 def try_plsa():
-    channel = grpc.insecure_channel('localhost:500')
+    channel = grpc.insecure_channel('localhost:5000')
     # channel = grpc.insecure_channel('172.17.0.75:5001')
     stub = topic_analysis_pb2_grpc.TopicAnalysisStub(channel)
 
 
-    plsa_request = topic_analysis_pb2.PLSARequest(docs=sample_data(),num_topics=3,maxiter=22,beta=1)
+    plsa_request = topic_analysis_pb2.PLSARequest(docs=sample_data(),num_topics=3,maxiter=50,beta=0.6)
 
     resp = stub.PLSA(plsa_request)
 
@@ -72,18 +72,7 @@ def try_plsa():
     print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
     print(resp.message)
     print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
-    print(resp.docs_list)
-    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
-    print(resp.topics)
-    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
-    print(resp.topicByDocMatirx)
-    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
-    print(resp.topicProbabilities)
-    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
-    print(resp.wordByTopicConditional)
-    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
-    print(resp.logLikelihoods)
-    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
+    print(resp.handle)
 
 
 

diff --git a/topic_analysis_grpc.py b/topic_analysis_grpc.py
@@ -9,6 +9,8 @@
 import os
 import csv
 import numpy as np
+import datetime
+import random
 
 
 SLEEP_TIME = 86400 # One day
@@ -19,15 +21,8 @@
 
 print(sys.path)
 
-# import example_plsa as pplsa
-# import plsa as plsa1
-# import cleansing as pclean
-#
-#
-# import random
-# import json
-# import datetime
 import plsa_wrapper
+import threading
 
 from service_spec import topic_analysis_pb2
 from service_spec import topic_analysis_pb2_grpc
@@ -81,48 +76,12 @@ def PLSA(self,request,context):
 
         try:
 
-            s = plsa_wrapper.PLSA_wrapper(docs)
-            s.write_to_json()
-            s.generate_topics_json()
-
-            with open(s.PLSA_PARAMETERS_PATH+'plsa_topics.txt','r') as f:
-                topics = f.read().splitlines()
-
-            topic_by_doc = []
-            word_by_topic_conditional = []
-            logLikelihoods = []
-            docs_list = []
-
-            with open(s.PLSA_PARAMETERS_PATH+'topic-by-doc-matirx.csv') as csv_file:
-                csv_reader = csv.reader(csv_file, delimiter=',')
-
-                docs_list = next(csv_reader)[1:]
-
-                for row in csv_reader:
-                    topic_by_doc.append(topic_analysis_pb2.FloatRow(doubleValue=list((np.array(row[1:])).astype(np.float))))
-
-
-            with open(s.PLSA_PARAMETERS_PATH+'topic_probability_pz','r') as f:
-                topic_probabilities = f.read().splitlines()
-
-                topic_probabilities = list((np.array(topic_probabilities)).astype(np.float))
-
-
-            with open(s.PLSA_PARAMETERS_PATH+'word_by_topic_conditional.csv') as csv_file:
-                csv_reader = csv.reader(csv_file, delimiter=',')
-
-                for row in csv_reader:
-                    word_by_topic_conditional.append(topic_analysis_pb2.FloatRow(doubleValue=list((np.array(row[:-1])).astype(np.float))))
-
-            with open(s.PLSA_PARAMETERS_PATH+'logL.txt','r') as f:
-                logLikelihoods = f.read().splitlines()
-
-                logLikelihoods = list((np.array(logLikelihoods)).astype(np.float))
-
-
-            resp = topic_analysis_pb2.PLSAResponse(status=True,message='success',docs_list=docs_list,topics=topics,topicByDocMatirx=topic_by_doc,topicProbabilities=topic_probabilities,wordByTopicConditional=word_by_topic_conditional,logLikelihoods=logLikelihoods)
+            unique_folder_naming = str(datetime.datetime.now()).replace(':', '-').replace('.', '-') + '^' + str(random.randint(100000000000, 999999999999)) + '/'
 
+            thread1 = threading.Thread(target=generate_topics_plsa, args=(docs,unique_folder_naming,num_topics,topic_divider,maxiter,beta))
+            thread1.start()
 
+            resp = topic_analysis_pb2.PLSAResponse(status=True, message='success', handle=unique_folder_naming[:-1])
 
 
             print('status:',resp.status)
@@ -145,6 +104,38 @@ def PLSA(self,request,context):
             return resp
 
 
+def generate_topics_plsa(docs,unique_folder_naming,num_topics,topic_divider,maxiter,beta):
+
+    # Put try catch here and add status
+
+    s = plsa_wrapper.PLSA_wrapper(docs)
+
+    try:
+
+        os.mkdir(s.plsa_parameters_path+unique_folder_naming)
+
+        # 1/0
+
+        with open(s.plsa_parameters_path+unique_folder_naming+'status.txt','w') as f:
+            f.write('Analysis started.')
+
+        s.unique_folder_naming = unique_folder_naming
+        s.num_topics = num_topics
+        s.topic_divider = topic_divider
+        s.max_iter = maxiter
+        s.beta = beta
+        s.write_to_json()
+        s.generate_topics_json()
+
+    except Exception as e:
+
+        logging.exception("message")
+
+        with open(s.plsa_parameters_path+unique_folder_naming+'status.txt','w') as f:
+            f.write('Failed.')
+            f.write('\n')
+            f.write(str(e))
+