diff --git a/second-task/models/speech_to_text.py b/second-task/models/speech_to_text.py index 35a46f3..9c28782 100644 --- a/second-task/models/speech_to_text.py +++ b/second-task/models/speech_to_text.py @@ -1,8 +1,10 @@ -import json from typing import List, Tuple +import pandas as pd import speech_recognition as sr -from pydub import AudioSegment # +from pydub import AudioSegment + +from models.labels_matcher import get_similar_label, EXPECTED_LABELS def divide_chunks(sound, chunk_size): @@ -10,9 +12,10 @@ def divide_chunks(sound, chunk_size): yield sound[i:i + chunk_size] -def process(file_path: str, chunk_size: int = 60000, apikey=None) -> List[Tuple[int, str]]: +def process(file_path: str, chunk_size: int = 60000, apikey=None) -> Tuple[List[Tuple[int, str]], int]: sound = AudioSegment.from_mp3(file_path) + seconds = sound.duration_seconds chunks = list(divide_chunks(sound, chunk_size)) r = sr.Recognizer() @@ -24,6 +27,7 @@ def process(file_path: str, chunk_size: int = 60000, apikey=None) -> List[Tuple[ with sr.AudioFile(wav_file_path) as source: audio = r.record(source) + if apikey: s = r.recognize_google_cloud(audio, language="pl-PL", credentials_json=apikey) words_per_second = len(s.split()) / 60 @@ -35,6 +39,17 @@ def process(file_path: str, chunk_size: int = 60000, apikey=None) -> List[Tuple[ s = r.recognize_google(audio, language="pl-PL") results.append((index*5, s)) + return results, seconds + + +def assign_label(chunks: List[Tuple[int, str]], max_second) -> pd.DataFrame: + results = pd.DataFrame(data={'second': list(range(1, int(max_second)))}) + results[EXPECTED_LABELS] = 0 + + for chunk in chunks: + labels = get_similar_label(chunk[1]) + results.loc[results['second'] == chunk[0], labels] = 1 + return results diff --git a/second-task/models/translator.py b/second-task/models/translator.py new file mode 100644 index 0000000..1bdaeb9 --- /dev/null +++ b/second-task/models/translator.py @@ -0,0 +1,25 @@ +from typing import List, Tuple + +from google.cloud import translate_v2 +from google.oauth2 import service_account + + +def translate(text, credentials): + translate_client = translate_v2.Client(credentials=credentials) + result = translate_client.translate(text, target_language='en', source_language='pl') + return result["translatedText"] + + +def translate_list_of_chunks(chunks: List[Tuple[int, str]], apikey_path: str) -> List[Tuple[int, str]]: + result_list = [] + credentials = service_account.Credentials.from_service_account_file(apikey_path) + + for chunk in chunks: + translated = translate(chunk[1], credentials) + result_list.append((chunk[0], translated)) + return result_list + + +if __name__ == '__main__': + chunks_list = [(0, "Wszedł kotek na płotek. Lubię placki."), (1, "inny język"), (2, 'ostatnia partia')] + translate_list_of_chunks(chunks_list, 'apikey.json') diff --git a/second-task/requirements.txt b/second-task/requirements.txt index 081ea28..00c7115 100644 --- a/second-task/requirements.txt +++ b/second-task/requirements.txt @@ -28,4 +28,5 @@ moviepy==1.0.3 opencv-python==4.4.0.46 pandas==1.1.4 grpcio -grpcio-tools \ No newline at end of file +grpcio-tools +google-cloud-translate==3.0.1 \ No newline at end of file diff --git a/second-task/service/celery_tasks/celery_task.py b/second-task/service/celery_tasks/celery_task.py index be73608..44fda70 100644 --- a/second-task/service/celery_tasks/celery_task.py +++ b/second-task/service/celery_tasks/celery_task.py @@ -1,9 +1,8 @@ -import json -import os - from google.oauth2 import service_account from models import speech_to_text, video_to_labels +from models.speech_to_text import assign_label +from models.translator import translate_list_of_chunks from service.api import celery from service.storage_manager import Storage, update_status, JobStatus @@ -11,23 +10,23 @@ @celery.task def process_speech_to_text(job_id: str): mp3_path = Storage.get_input_mp3_path(job_id).replace('../', './') - results = speech_to_text.process(mp3_path) - with open(mp3_path.replace('.mp3', '.json'), 'w+') as file: - json.dump(results, file) - # TODO: call pdf creation + labels, seconds = speech_to_text.process(mp3_path) + translated_labels = translate_list_of_chunks(labels, Storage.get_apikey_path()) + results = assign_label(translated_labels, seconds) + results_path = Storage.get_results_path(job_id) + print(results_path) + print(results.head()) + results.to_csv(results_path, index=False) update_status(job_id, JobStatus.finished) @celery.task def process_video_to_labels(job_id: str): - - import os - print(os.path.dirname(os.path.realpath(__file__))) - mp4_path = Storage.get_input_mp4_path(job_id).replace('../', './') credentials_path = Storage.get_apikey_path() credentials = service_account.Credentials.from_service_account_file(credentials_path) labels = video_to_labels.process(mp4_path, credentials) - results_path = os.path.join(Storage.get_job_root(job_id).replace('../', './'), 'results.csv') + results_path = Storage.get_results_path(job_id) + print(labels.head()) labels.to_csv(results_path, index=False) update_status(job_id, JobStatus.finished) diff --git a/second-task/service/storage_manager.py b/second-task/service/storage_manager.py index 4878281..f8c6434 100644 --- a/second-task/service/storage_manager.py +++ b/second-task/service/storage_manager.py @@ -27,7 +27,7 @@ def get_job_root(job_id): @staticmethod def get_results_path(job_id): - return os.path.join('..', Storage.get_job_root(job_id), 'results.pdf') + return os.path.join(Storage.get_job_root(job_id), 'results.csv') @staticmethod def get_input_mp3_path(job_id):