Skip to content
This repository was archived by the owner on Feb 1, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions second-task/models/speech_to_text.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import json
from typing import List, Tuple

import pandas as pd
import speech_recognition as sr
from pydub import AudioSegment #
from pydub import AudioSegment

from models.labels_matcher import get_similar_label, EXPECTED_LABELS


def divide_chunks(sound, chunk_size):
for i in range(0, len(sound), chunk_size):
yield sound[i:i + chunk_size]


def process(file_path: str, chunk_size: int = 60000, apikey=None) -> List[Tuple[int, str]]:
def process(file_path: str, chunk_size: int = 60000, apikey=None) -> Tuple[List[Tuple[int, str]], int]:

sound = AudioSegment.from_mp3(file_path)
seconds = sound.duration_seconds
chunks = list(divide_chunks(sound, chunk_size))

r = sr.Recognizer()
Expand All @@ -24,6 +27,7 @@ def process(file_path: str, chunk_size: int = 60000, apikey=None) -> List[Tuple[

with sr.AudioFile(wav_file_path) as source:
audio = r.record(source)

if apikey:
s = r.recognize_google_cloud(audio, language="pl-PL", credentials_json=apikey)
words_per_second = len(s.split()) / 60
Expand All @@ -35,6 +39,17 @@ def process(file_path: str, chunk_size: int = 60000, apikey=None) -> List[Tuple[
s = r.recognize_google(audio, language="pl-PL")
results.append((index*5, s))

return results, seconds


def assign_label(chunks: List[Tuple[int, str]], max_second) -> pd.DataFrame:
results = pd.DataFrame(data={'second': list(range(1, int(max_second)))})
results[EXPECTED_LABELS] = 0

for chunk in chunks:
labels = get_similar_label(chunk[1])
results.loc[results['second'] == chunk[0], labels] = 1

return results


Expand Down
25 changes: 25 additions & 0 deletions second-task/models/translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from typing import List, Tuple

from google.cloud import translate_v2
from google.oauth2 import service_account


def translate(text, credentials):
translate_client = translate_v2.Client(credentials=credentials)
result = translate_client.translate(text, target_language='en', source_language='pl')
return result["translatedText"]


def translate_list_of_chunks(chunks: List[Tuple[int, str]], apikey_path: str) -> List[Tuple[int, str]]:
result_list = []
credentials = service_account.Credentials.from_service_account_file(apikey_path)

for chunk in chunks:
translated = translate(chunk[1], credentials)
result_list.append((chunk[0], translated))
return result_list


if __name__ == '__main__':
chunks_list = [(0, "Wszedł kotek na płotek. Lubię placki."), (1, "inny język"), (2, 'ostatnia partia')]
translate_list_of_chunks(chunks_list, 'apikey.json')
3 changes: 2 additions & 1 deletion second-task/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ moviepy==1.0.3
opencv-python==4.4.0.46
pandas==1.1.4
grpcio
grpcio-tools
grpcio-tools
google-cloud-translate==3.0.1
23 changes: 11 additions & 12 deletions second-task/service/celery_tasks/celery_task.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
import json
import os

from google.oauth2 import service_account

from models import speech_to_text, video_to_labels
from models.speech_to_text import assign_label
from models.translator import translate_list_of_chunks
from service.api import celery
from service.storage_manager import Storage, update_status, JobStatus


@celery.task
def process_speech_to_text(job_id: str):
mp3_path = Storage.get_input_mp3_path(job_id).replace('../', './')
results = speech_to_text.process(mp3_path)
with open(mp3_path.replace('.mp3', '.json'), 'w+') as file:
json.dump(results, file)
# TODO: call pdf creation
labels, seconds = speech_to_text.process(mp3_path)
translated_labels = translate_list_of_chunks(labels, Storage.get_apikey_path())
results = assign_label(translated_labels, seconds)
results_path = Storage.get_results_path(job_id)
print(results_path)
print(results.head())
results.to_csv(results_path, index=False)
update_status(job_id, JobStatus.finished)


@celery.task
def process_video_to_labels(job_id: str):

import os
print(os.path.dirname(os.path.realpath(__file__)))

mp4_path = Storage.get_input_mp4_path(job_id).replace('../', './')
credentials_path = Storage.get_apikey_path()
credentials = service_account.Credentials.from_service_account_file(credentials_path)
labels = video_to_labels.process(mp4_path, credentials)
results_path = os.path.join(Storage.get_job_root(job_id).replace('../', './'), 'results.csv')
results_path = Storage.get_results_path(job_id)
print(labels.head())
labels.to_csv(results_path, index=False)
update_status(job_id, JobStatus.finished)
2 changes: 1 addition & 1 deletion second-task/service/storage_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_job_root(job_id):

@staticmethod
def get_results_path(job_id):
return os.path.join('..', Storage.get_job_root(job_id), 'results.pdf')
return os.path.join(Storage.get_job_root(job_id), 'results.csv')

@staticmethod
def get_input_mp3_path(job_id):
Expand Down