From b01d70a9a9b1b04994775b1063902faeef521908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Cupek?= Date: Sat, 14 Nov 2020 17:58:19 +0100 Subject: [PATCH 1/3] Added translator module for better audio transcription classification. --- second-task/models/translator.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 second-task/models/translator.py diff --git a/second-task/models/translator.py b/second-task/models/translator.py new file mode 100644 index 0000000..029de91 --- /dev/null +++ b/second-task/models/translator.py @@ -0,0 +1,26 @@ +from google.cloud import translate_v2 +from google.oauth2 import service_account + + +def translate(text): + my_credentials = service_account.Credentials.from_service_account_file('apikey.json') + translate_client = translate_v2.Client(credentials=my_credentials) + result = translate_client.translate(text, target_language='en', source_language='pl') + + # print(u"Text: {}".format(result["input"])) + # print(u"Translation: {}".format(result["translatedText"])) + return result["translatedText"] + + +def translate_list_of_chunks(chunks: list) -> list: + ''' Accepts list of tuples like [(int, string), ...]''' + result_list = [] + for chunk in chunks: + translated = translate(chunk[1]) + result_list.append((chunk[0], translated)) + return result_list + + +if __name__ == '__main__': + chunks_list = [(0, "Wszedł kotek na płotek. Lubię placki."), (1, "inny język"), (2, 'ostatnia partia')] + translate_list_of_chunks(chunks_list) From 845d25dd6f4a9bffa2f4b80ae10b39cdbf61f3d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Cupek?= Date: Sat, 14 Nov 2020 17:58:50 +0100 Subject: [PATCH 2/3] fixed docstring --- second-task/models/translator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/second-task/models/translator.py b/second-task/models/translator.py index 029de91..367f94a 100644 --- a/second-task/models/translator.py +++ b/second-task/models/translator.py @@ -13,7 +13,7 @@ def translate(text): def translate_list_of_chunks(chunks: list) -> list: - ''' Accepts list of tuples like [(int, string), ...]''' + """ Accepts list of tuples like [(int, string), ...]""" result_list = [] for chunk in chunks: translated = translate(chunk[1]) From 3924ad5150d4da6e76f97a2c5f360d28167bfb5a Mon Sep 17 00:00:00 2001 From: Kamil Zawistowski Date: Sun, 15 Nov 2020 04:56:31 +0100 Subject: [PATCH 3/3] integrate chunk audio with labels assigment and webapp --- second-task/models/speech_to_text.py | 28 ++++++++++++++++--- second-task/models/translator.py | 19 ++++++------- second-task/requirements.txt | 3 +- .../service/celery_tasks/celery_task.py | 23 ++++++++------- second-task/service/storage_manager.py | 2 +- 5 files changed, 47 insertions(+), 28 deletions(-) diff --git a/second-task/models/speech_to_text.py b/second-task/models/speech_to_text.py index c02eacb..f7bb6f6 100644 --- a/second-task/models/speech_to_text.py +++ b/second-task/models/speech_to_text.py @@ -1,8 +1,10 @@ -import json from typing import List, Tuple +import pandas as pd import speech_recognition as sr -from pydub import AudioSegment # +from pydub import AudioSegment + +from models.labels_matcher import get_similar_label, EXPECTED_LABELS def divide_chunks(sound, chunk_size): @@ -10,9 +12,10 @@ def divide_chunks(sound, chunk_size): yield sound[i:i + chunk_size] -def process(file_path: str, chunk_size: int = 5000, apikey=None) -> List[Tuple[int, str]]: +def process(file_path: str, chunk_size: int = 60000, apikey=None) -> Tuple[List[Tuple[int, str]], int]: sound = AudioSegment.from_mp3(file_path) + seconds = sound.duration_seconds chunks = list(divide_chunks(sound, chunk_size)) r = sr.Recognizer() @@ -24,13 +27,30 @@ def process(file_path: str, chunk_size: int = 5000, apikey=None) -> List[Tuple[i with sr.AudioFile(wav_file_path) as source: audio = r.record(source) + if apikey: s = r.recognize_google_cloud(audio, language="pl-PL", credentials_json=apikey) - results.append((index*5, s)) + words_per_second = len(s.split()) / 60 + word_counter = 1 + for word in s.split(): + results.append((int((index * 60) + word_counter / words_per_second), word)) + word_counter += 1 + print(index) else: s = r.recognize_google(audio, language="pl-PL") results.append((index*5, s)) + return results, seconds + + +def assign_label(chunks: List[Tuple[int, str]], max_second) -> pd.DataFrame: + results = pd.DataFrame(data={'second': list(range(1, int(max_second)))}) + results[EXPECTED_LABELS] = 0 + + for chunk in chunks: + labels = get_similar_label(chunk[1]) + results.loc[results['second'] == chunk[0], labels] = 1 + return results diff --git a/second-task/models/translator.py b/second-task/models/translator.py index 367f94a..1bdaeb9 100644 --- a/second-task/models/translator.py +++ b/second-task/models/translator.py @@ -1,26 +1,25 @@ +from typing import List, Tuple + from google.cloud import translate_v2 from google.oauth2 import service_account -def translate(text): - my_credentials = service_account.Credentials.from_service_account_file('apikey.json') - translate_client = translate_v2.Client(credentials=my_credentials) +def translate(text, credentials): + translate_client = translate_v2.Client(credentials=credentials) result = translate_client.translate(text, target_language='en', source_language='pl') - - # print(u"Text: {}".format(result["input"])) - # print(u"Translation: {}".format(result["translatedText"])) return result["translatedText"] -def translate_list_of_chunks(chunks: list) -> list: - """ Accepts list of tuples like [(int, string), ...]""" +def translate_list_of_chunks(chunks: List[Tuple[int, str]], apikey_path: str) -> List[Tuple[int, str]]: result_list = [] + credentials = service_account.Credentials.from_service_account_file(apikey_path) + for chunk in chunks: - translated = translate(chunk[1]) + translated = translate(chunk[1], credentials) result_list.append((chunk[0], translated)) return result_list if __name__ == '__main__': chunks_list = [(0, "Wszedł kotek na płotek. Lubię placki."), (1, "inny język"), (2, 'ostatnia partia')] - translate_list_of_chunks(chunks_list) + translate_list_of_chunks(chunks_list, 'apikey.json') diff --git a/second-task/requirements.txt b/second-task/requirements.txt index 081ea28..00c7115 100644 --- a/second-task/requirements.txt +++ b/second-task/requirements.txt @@ -28,4 +28,5 @@ moviepy==1.0.3 opencv-python==4.4.0.46 pandas==1.1.4 grpcio -grpcio-tools \ No newline at end of file +grpcio-tools +google-cloud-translate==3.0.1 \ No newline at end of file diff --git a/second-task/service/celery_tasks/celery_task.py b/second-task/service/celery_tasks/celery_task.py index be73608..44fda70 100644 --- a/second-task/service/celery_tasks/celery_task.py +++ b/second-task/service/celery_tasks/celery_task.py @@ -1,9 +1,8 @@ -import json -import os - from google.oauth2 import service_account from models import speech_to_text, video_to_labels +from models.speech_to_text import assign_label +from models.translator import translate_list_of_chunks from service.api import celery from service.storage_manager import Storage, update_status, JobStatus @@ -11,23 +10,23 @@ @celery.task def process_speech_to_text(job_id: str): mp3_path = Storage.get_input_mp3_path(job_id).replace('../', './') - results = speech_to_text.process(mp3_path) - with open(mp3_path.replace('.mp3', '.json'), 'w+') as file: - json.dump(results, file) - # TODO: call pdf creation + labels, seconds = speech_to_text.process(mp3_path) + translated_labels = translate_list_of_chunks(labels, Storage.get_apikey_path()) + results = assign_label(translated_labels, seconds) + results_path = Storage.get_results_path(job_id) + print(results_path) + print(results.head()) + results.to_csv(results_path, index=False) update_status(job_id, JobStatus.finished) @celery.task def process_video_to_labels(job_id: str): - - import os - print(os.path.dirname(os.path.realpath(__file__))) - mp4_path = Storage.get_input_mp4_path(job_id).replace('../', './') credentials_path = Storage.get_apikey_path() credentials = service_account.Credentials.from_service_account_file(credentials_path) labels = video_to_labels.process(mp4_path, credentials) - results_path = os.path.join(Storage.get_job_root(job_id).replace('../', './'), 'results.csv') + results_path = Storage.get_results_path(job_id) + print(labels.head()) labels.to_csv(results_path, index=False) update_status(job_id, JobStatus.finished) diff --git a/second-task/service/storage_manager.py b/second-task/service/storage_manager.py index 4878281..f8c6434 100644 --- a/second-task/service/storage_manager.py +++ b/second-task/service/storage_manager.py @@ -27,7 +27,7 @@ def get_job_root(job_id): @staticmethod def get_results_path(job_id): - return os.path.join('..', Storage.get_job_root(job_id), 'results.pdf') + return os.path.join(Storage.get_job_root(job_id), 'results.csv') @staticmethod def get_input_mp3_path(job_id):