singnet · edyirdaw · Feb 12, 2019 · Feb 7, 2019 · Feb 8, 2019 · Feb 8, 2019
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,7 @@
 .idea/
 *~
+plsa-service/__pycache__/
+plsa-service/plsa/_plsa.c
+service_spec/__pycache__/
+service_spec/topic_analysis_pb2.py
+service_spec/topic_analysis_pb2_grpc.py
diff --git a/plsa/__init__.py → plsa-service/__init__.py b/plsa/__init__.py → plsa-service/__init__.py
diff --git a/plsa/plsa/__init__.py → plsa-service/plsa/__init__.py b/plsa/plsa/__init__.py → plsa-service/plsa/__init__.py
diff --git a/plsa/plsa/_plsa.pyx → plsa-service/plsa/_plsa.pyx b/plsa/plsa/_plsa.pyx → plsa-service/plsa/_plsa.pyx
diff --git a/plsa/plsa/example_plsa.py → plsa-service/plsa/example_plsa.py b/plsa/plsa/example_plsa.py → plsa-service/plsa/example_plsa.py
@@ -9,14 +9,16 @@
 import logging
 import numpy as np
 
+
 # import taskmanager as tm
 import pandas as pd
 from tfidf.preprocessing import read_files, preprocess_documents, read_json, json_files_list
 # from tfidf.porter import PorterStemmer
 from tfidf.tfidf import *
 # from tfidf.preprocessing import file_list, empty_file_list
 
-from plsa import pLSA
+# from plsa import pLSA
+import plsa as plsa1
 import porter_dictionary
 
 # s_file_list = []
@@ -286,7 +288,7 @@ def train(data, maxiter=500, debug=True):
 
     td, vocab = data
     # td = td[:,:-1]
-    plsa = pLSA()
+    plsa = plsa1.pLSA()
     plsa.debug = debug
     plsa.logL_pic = logL_pic
     # model=plsa.train(td, num_topics, maxiter)
@@ -386,22 +388,22 @@ def train(data, maxiter=500, debug=True):
 def average_train(data, maxiter=500, debug=True):
     td, idf, vocab = data
     td = td[:,:-1]
-    plsa = pLSA()
+    plsa = plsa1.pLSA()
     plsa.debug = debug
     return plsa.average_train(10)(td, 10, maxiter)
 
 # @tm.task(feat, train, int, int)
 def folding_in(data, model, maxiter=30, debug=True):
     td, idf, vocab = data
     d = td[:,-1]
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     plsa.debug = debug
     print (plsa.folding_in(d, maxiter))
 
 # @tm.nocache
 # @tm.task(train)
 def document_topics(model):
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     for i in  plsa.document_topics():
        print (i)
        # file_txt1.write(str(i))
@@ -411,13 +413,13 @@ def document_topics(model):
 # @tm.nocache
 # @tm.task(train)
 def document_cluster(model):
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     print (plsa.document_cluster())
 
 # @tm.nocache
 # @tm.task(train)
 def word_topics(model):
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     for i in  plsa.word_topics():
        print (i)
        # file_txt1.write(str(i))
@@ -427,13 +429,13 @@ def word_topics(model):
 # @tm.nocache
 # @tm.task(train)
 def word_cluster(model):
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     print (plsa.word_cluster())
 
 # @tm.nocache
 # @tm.task(train)
 def unigram_smoothing(model):
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     print (plsa.unigram_smoothing())
 
 # @tm.nocache
@@ -445,7 +447,7 @@ def topic_labels(data, model, N=50):
     port_dict.load_dict(dict_path)
     # print port_dict.dictionary
     td, vocab = data
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     inv_vocab = inverse_vocab(vocab)
     dict_vocab=[]
     # vocab_list=[x for x in inv_vocab[1]]
@@ -470,7 +472,7 @@ def topic_labels(data, model, N=50):
 # @tm.task(feat, train)
 def global_weights(data, model):
     td, idf, vocab = data
-    plsa = pLSA(model)
+    plsa = plsa1.pLSA(model)
     print (plsa.global_weights(idf))
 
 def main():
@@ -500,5 +502,7 @@ def main():
     # word_cluster(model)
     # word_topics(model)
     # document_topics(model)
+
+
 if __name__ == "__main__":
     main()
diff --git a/plsa/plsa/plsa.py → plsa-service/plsa/plsa.py b/plsa/plsa/plsa.py → plsa-service/plsa/plsa.py
@@ -31,7 +31,6 @@
     Z: number of topics
 
 """
-
 import psutil
 import numpy as np
 import matplotlib

diff --git a/plsa/plsa/plsa_train.c → plsa-service/plsa/plsa_train.c b/plsa/plsa/plsa_train.c → plsa-service/plsa/plsa_train.c
diff --git a/plsa/plsa/plsa_wrapper.py → plsa-service/plsa/plsa_wrapper.py b/plsa/plsa/plsa_wrapper.py → plsa-service/plsa/plsa_wrapper.py
@@ -13,18 +13,25 @@
 import time
 import json
 
-sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[1])+'/plsa/plsa')
-sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[1])+'/plsa/preprocessing')
+sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[1])+'/plsa-service/plsa')
+sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[1])+'/plsa-service/preprocessing')
+sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[0])+'/topic-analysis/plsa-service/plsa')
+sys.path.append(str(pathlib.Path(os.path.abspath('')).parents[0])+'/topic-analysis/plsa-service/preprocessing')
+
 
 import example_plsa as pplsa
 import cleansing as pclean
 
-class TopicAnalysis:
+class PLSA_wrapper:
 
-    def __init__(self, docs):
+    def __init__(self, docs,local=False):
 
         self.docs = docs
-        self.root_path = str(pathlib.Path(os.path.abspath('')).parents[2]) + '/appData/plsa/'
+        if not local:
+            self.root_path = str(pathlib.Path(os.path.abspath('')).parents[0]) + '/appData/plsa/'
+        else:
+            self.root_path = str(pathlib.Path(os.path.abspath('')).parents[2]) + '/appData/plsa/'
+        print('>>>>>>>>>>>>>self.root_path>>>>>>>>>>>')
         print(self.root_path)
         self.extracted_folder = self.root_path + 'extracted/'
         self.file_dict = self.root_path + 'dict/'
@@ -114,7 +121,7 @@ def generate_topics_json(self):
 
 def run_plsa():
 
-    path = ''
+    path = str(pathlib.Path(os.path.abspath('')).parents[2])+'/appData/misc/extracted.json'
 
     docs = []
 
@@ -125,7 +132,7 @@ def run_plsa():
     for k in fileList:
         docs.append(fileList[k])
 
-    s = TopicAnalysis(docs)
+    s = PLSA_wrapper(docs,local=True)
     s.write_to_json()
     s.generate_topics_json()
 

diff --git a/plsa/plsa/porter_dictionary.py → plsa-service/plsa/porter_dictionary_old.py b/plsa/plsa/porter_dictionary.py → plsa-service/plsa/porter_dictionary_old.py
diff --git a/plsa/plsa/setup.py → plsa-service/plsa/setup.py b/plsa/plsa/setup.py → plsa-service/plsa/setup.py
diff --git a/plsa/plsa/tester_python3.py → plsa-service/plsa/tester_python3.py b/plsa/plsa/tester_python3.py → plsa-service/plsa/tester_python3.py
diff --git a/plsa/plsa/tests.py → plsa-service/plsa/tests.py b/plsa/plsa/tests.py → plsa-service/plsa/tests.py
diff --git a/plsa/plsa/tfidf/__init__.py → plsa-service/plsa/tfidf/__init__.py b/plsa/plsa/tfidf/__init__.py → plsa-service/plsa/tfidf/__init__.py
diff --git a/plsa/plsa/tfidf/featsel.py → plsa-service/plsa/tfidf/featsel.py b/plsa/plsa/tfidf/featsel.py → plsa-service/plsa/tfidf/featsel.py
diff --git a/plsa/plsa/tfidf/porter.py → plsa-service/plsa/tfidf/porter.py b/plsa/plsa/tfidf/porter.py → plsa-service/plsa/tfidf/porter.py
diff --git a/plsa/plsa/tfidf/preprocessing.py → plsa-service/plsa/tfidf/preprocessing.py b/plsa/plsa/tfidf/preprocessing.py → plsa-service/plsa/tfidf/preprocessing.py
diff --git a/plsa/plsa/tfidf/smoothing.py → plsa-service/plsa/tfidf/smoothing.py b/plsa/plsa/tfidf/smoothing.py → plsa-service/plsa/tfidf/smoothing.py
diff --git a/plsa/plsa/tfidf/tfidf.py → plsa-service/plsa/tfidf/tfidf.py b/plsa/plsa/tfidf/tfidf.py → plsa-service/plsa/tfidf/tfidf.py
diff --git a/plsa/preprocessing/__init__.py → plsa-service/preprocessing/__init__.py b/plsa/preprocessing/__init__.py → plsa-service/preprocessing/__init__.py
diff --git a/plsa/preprocessing/cleansing.py → plsa-service/preprocessing/cleansing.py b/plsa/preprocessing/cleansing.py → plsa-service/preprocessing/cleansing.py
@@ -20,7 +20,8 @@
 import pathlib as path
 from stemming.porter2 import stem
 import string
-import preprocessing.porter_dictionary as pp
+# import preprocessing.porter_dictionary as pp
+import porter_dictionary as pp
 # import re
 
 port_dict = pp.porter_dictionary()

diff --git a/plsa/preprocessing/porter_dictionary.py → ...ervice/preprocessing/porter_dictionary.py b/plsa/preprocessing/porter_dictionary.py → ...ervice/preprocessing/porter_dictionary.py
diff --git a/servcie_spec/topic_analysis.proto b/servcie_spec/topic_analysis.proto
diff --git a/service_spec/topic_analysis.proto b/service_spec/topic_analysis.proto
@@ -0,0 +1,49 @@
+syntax = "proto3";
+
+
+
+
+message PLSARequest {
+
+    repeated string docs = 1;
+    sint64 num_topics = 2;
+    sint64 topic_divider = 3;
+    sint64 maxiter = 4;
+    float beta = 5;
+}
+
+
+message FloatRow{
+
+    repeated float floatRow = 1;
+}
+
+message PLSAResponse{
+
+    bool status = 1;
+    string message = 2;
+    repeated string docs_list = 3;
+    repeated string topics = 4;
+    repeated FloatRow topicByDocMatirx = 5;
+    repeated float topicProbabilities = 6;
+    repeated float wordByTopicConditional = 7;
+    repeated float logLikelihoods = 8;
+
+
+}
+
+
+
+
+
+///// Topic analysis Services
+
+service TopicAnalysis {
+
+
+    rpc PLSA (PLSARequest) returns (PLSAResponse) {};
+
+
+}
+
+///// End Network Analytics Services
diff --git a/snet_test_client.py b/snet_test_client.py
@@ -0,0 +1,93 @@
+# Tested on python3.6
+
+import logging
+import os
+import pathlib
+import json
+import csv
+import numpy as np
+
+import grpc
+
+from service_spec import topic_analysis_pb2, topic_analysis_pb2_grpc
+
+import subprocess
+
+
+
+
+def sample_data():
+
+    path = str(pathlib.Path(os.path.abspath('')).parents[0])+'/appData/misc/extracted.json'
+
+    docs = []
+
+    with open(path, "r") as read_file:
+        fileList = json.load(read_file)
+
+    for k in fileList:
+        docs.append(fileList[k])
+
+    return docs
+
+
+def csv_reader():
+
+    path = str(pathlib.Path(os.path.abspath('')).parents[0]) + '/appData/misc/topic-by-doc-matirx.csv'
+
+    resp = []
+
+
+    with open(path) as csv_file:
+        csv_reader = csv.reader(csv_file, delimiter=',')
+
+        docs_list = next(csv_reader)
+
+        print(docs_list[1:])
+
+        for row in csv_reader:
+            print('^^^^^^^^^^^^^^^^^^^^^^^^^^')
+            print(len(row))
+            # print(row[1:])
+            resp.append(list((np.array(row[1:])).astype(np.float)))
+
+    print('`````````````````````````````````')
+    print(resp)
+
+
+
+
+def try_plsa():
+    channel = grpc.insecure_channel('localhost:5000')
+    stub = topic_analysis_pb2_grpc.TopicAnalysisStub(channel)
+
+
+    plsa_request = topic_analysis_pb2.PLSARequest(docs=sample_data(),num_topics=3,maxiter=22,beta=1)
+
+    resp = stub.PLSA(plsa_request)
+
+
+    print(resp.status)
+    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
+    print(resp.message)
+    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
+    print(resp.docs_list)
+    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
+    print(resp.topics)
+    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
+    print(resp.topicByDocMatirx)
+    print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
+
+
+
+
+
+
+
+
+if __name__ == '__main__':
+
+    try_plsa()
+    # csv_reader()
+
+