diff --git a/test_topic_analysis_grpc.py b/test_topic_analysis_grpc.py index 46c3913..0a11351 100644 --- a/test_topic_analysis_grpc.py +++ b/test_topic_analysis_grpc.py @@ -22,6 +22,7 @@ def setUp(self): self.app = analysis_results.app.test_client() self.docs = [] + self.docs_2 = None sample_doc = 'docs/tests/test_doc.txt' with open(sample_doc,'r') as f: @@ -29,6 +30,9 @@ def setUp(self): self.docs = list(filter(lambda a: a != '', self.docs)) + with open(sample_doc,'r') as f: + self.docs_2 = [f.read()] + channel = grpc.insecure_channel('localhost:5000') self.stub = topic_analysis_pb2_grpc.TopicAnalysisStub(channel) @@ -78,8 +82,42 @@ def test_response_format_grpc(self): + # Test for untokenized text input + + plsa_request = topic_analysis_pb2.PLSARequest(docs=self.docs_2, num_topics=2, maxiter=22, beta=1) + + resp = self.stub.PLSA(plsa_request) + + print('////////////// Sleeping till topic analysis finishes') + time.sleep(sleep_time_secs) + print('\\\\\\\\\\\\\\\\\\\\\\\\\\\\ Wide awake now') + + print(resp) + + self.assertEqual([resp.status,resp.message],[True,'success']) + resp2 = self.app.get('/topic-analysis/api/v1.0/results?handle='+resp.handle) + resp2_data = json.loads(resp2.get_data(as_text=True)) + print(';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;') + self.assertEqual(resp2_data['status'],'Topic analysis finished.') + self.assertGreater(resp2_data['total running time in minutes'],0.0) + self.assertEqual(resp2_data['docs_list'], [str(i) for i in range(0,98)]) + self.assertEqual(len(resp2_data['topics']),2) + self.assertIsInstance(resp2_data['topics'][0],str) + self.assertIsInstance(resp2_data['topics'][1],str) + self.assertEqual(len(resp2_data['topicByDocMatirx']),2) + self.assertEqual(len(resp2_data['topicByDocMatirx'][0]),98) + self.assertAlmostEqual(sum(sum(resp2_data['topicByDocMatirx'],[])),1.0,delta=0.1) + print('sum of p(z,d)=',sum(sum(resp2_data['topicByDocMatirx'],[]))) + self.assertAlmostEqual(resp2_data['topicProbabilities'][0]+ resp2_data['topicProbabilities'][1],1.0,delta=0.1) + self.assertEqual(len(resp2_data['wordByTopicConditional']), 2) + self.assertEqual(len(resp2_data['wordByTopicConditional'][0]), 300) + self.assertAlmostEqual(sum(sum(resp2_data['wordByTopicConditional'], [])), 1.0, delta=0.1) + print('sum of p(w|z)=',sum(sum(resp2_data['wordByTopicConditional'],[]))) + self.assertEqual(len(resp2_data['logLikelihoods']),23) + for i in range(0,23): + self.assertLess(resp2_data['logLikelihoods'][i],0) diff --git a/topic_analysis_grpc.py b/topic_analysis_grpc.py index 48c68db..fabff29 100644 --- a/topic_analysis_grpc.py +++ b/topic_analysis_grpc.py @@ -11,6 +11,7 @@ import numpy as np import datetime import random +from nltk.tokenize import sent_tokenize SLEEP_TIME = 86400 # One day @@ -49,10 +50,13 @@ def PLSA(self,request,context): try : - if len(docs) < 2: - message = 'Length of docs should be at least two' + if len(docs) < 1: + message = 'Length of docs should be at one' param_error =True + if len(docs) == 1: + docs = sent_tokenize(docs[0]) + if topic_divider < 0: param_error = True message = 'topic_divider parameter can not be a negative nubmer'