From 585efabfc8c51fdc6b87f572f488942f5ceea993 Mon Sep 17 00:00:00 2001 From: Anushka Saxena Date: Fri, 3 May 2024 15:02:29 +0530 Subject: [PATCH 1/5] SLS Model From Scrach --- .ipynb_checkpoints/Issue 1-checkpoint.ipynb | 6 + Issue 1.ipynb | 150 ++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 .ipynb_checkpoints/Issue 1-checkpoint.ipynb create mode 100644 Issue 1.ipynb diff --git a/.ipynb_checkpoints/Issue 1-checkpoint.ipynb b/.ipynb_checkpoints/Issue 1-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Issue 1-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Issue 1.ipynb b/Issue 1.ipynb new file mode 100644 index 0000000..c5f7262 --- /dev/null +++ b/Issue 1.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "207e376a", + "metadata": {}, + "source": [ + "# Creating project from scratch " + ] + }, + { + "cell_type": "markdown", + "id": "2d94d30c", + "metadata": {}, + "source": [ + "Please find below a description of a Python script that can generate SRT (SubRip) files for videos in Tamil, Telugu, and Kannada languages. The script takes a video file and a corresponding text file as input and generates SRT files with subtitles for the given video content. Here is a step-by-step guide on how the script works and what you need to run it:\n", + "\n", + "1. Purpose: The script is designed to generate SRT files with subtitles for Tamil, Telugu, and Kannada videos. The output file will have the same name as the input video file, with the language appended at the end (for example, `example_video_Tamil.srt`).\n", + "\n", + "2. How it Works:\n", + " - The script first loads the video content and the text content from the provided files.\n", + " - Depending on the language chosen (Tamil, Telugu, or Kannada), it aligns and segments the text to synchronize with the video content.\n", + " - The alignment and segmentation are language-specific, with separate methods for Tamil, Telugu, and Kannada.\n", + " - Finally, it creates SRT files with the generated subtitles.\n", + "\n", + "3. What's Needed:\n", + " - Video File: You need a video file in a format that can be processed. Make sure to provide the correct path to the video file (for example, `example_video.mp4` in this script).\n", + " - Text File: You need a text file containing the transcript or dialogue of the video. This text should correspond to the language of the video. Provide the path to this file (for example, `example_text.txt` in this script).\n", + " - Dependencies: The script relies on Python's standard library and does not require any additional dependencies.\n", + "\n", + "4. Running the Script:\n", + " - Save the script into a Python file (for example, `subtitle_generator.py`).\n", + " - Place your video file and text file in the same directory as the script.\n", + " - Run the script in a Python environment.\n", + " - It will generate SRT files for each language specified (Tamil, Telugu, and Kannada), each named after the video file with the language appended (for example, `example_video_Tamil.srt`).\n", + " - Check the console output for confirmation of successful SRT generation.\n", + "\n", + "Please note that the placeholder methods (`load_video`, `align_and_segment_tamil`, `align_and_segment_telugu`, `align_and_segment_kannada`) in the script need to be replaced with actual implementations tailored to your requirements, especially for loading the video and aligning/segmenting the text based on the language." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b06471e", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "class SubtitleGenerator:\n", + " def __init__(self):\n", + " self.languages = [\"Tamil\", \"Telugu\", \"Kannada\"]\n", + " \n", + " def generate_srt(self, video_file, text_file, language):\n", + " # Load video and text content\n", + " video_content = self.load_video(video_file)\n", + " text_content = self.load_text(text_file)\n", + " \n", + " # Perform alignment and segmentation based on language\n", + " if language == \"Tamil\":\n", + " subtitle_units = self.align_and_segment_tamil(video_content, text_content)\n", + " elif language == \"Telugu\":\n", + " subtitle_units = self.align_and_segment_telugu(video_content, text_content)\n", + " elif language == \"Kannada\":\n", + " subtitle_units = self.align_and_segment_kannada(video_content, text_content)\n", + " else:\n", + " print(\"Language not supported.\")\n", + " return\n", + " \n", + " # Generate SRT file\n", + " srt_content = self.create_srt(subtitle_units)\n", + " srt_filename = os.path.splitext(video_file)[0] + \"_\" + language + \".srt\"\n", + " with open(srt_filename, \"w\", encoding=\"utf-8\") as srt_file:\n", + " srt_file.write(srt_content)\n", + " \n", + " print(f\"SRT file for {language} generated successfully.\")\n", + "\n", + " def load_video(self, video_file):\n", + " # Load video file\n", + " # Placeholder for actual video loading process\n", + " print(f\"Video file '{video_file}' loaded.\")\n", + " return \"Video Content\"\n", + "\n", + " def load_text(self, text_file):\n", + " # Load text file\n", + " with open(text_file, \"r\", encoding=\"utf-8\") as file:\n", + " text_content = file.read()\n", + " print(f\"Text file '{text_file}' loaded.\")\n", + " return text_content\n", + "\n", + " def align_and_segment_tamil(self, video_content, text_content):\n", + " # Placeholder for Tamil alignment and segmentation\n", + " # Implement alignment and segmentation specific to Tamil\n", + " print(\"Aligning and segmenting Tamil text...\")\n", + " subtitle_units = [\"Subtitle 1\", \"Subtitle 2\", \"Subtitle 3\"] # Placeholder\n", + " return subtitle_units\n", + "\n", + " def align_and_segment_telugu(self, video_content, text_content):\n", + " \n", + " print(\"Aligning and segmenting Telugu text...\")\n", + " subtitle_units = [\"Subtitle 1\", \"Subtitle 2\", \"Subtitle 3\"] # Placeholder\n", + " return subtitle_units\n", + "\n", + " def align_and_segment_kannada(self, video_content, text_content):\n", + " \n", + " print(\"Aligning and segmenting Kannada text...\")\n", + " subtitle_units = [\"Subtitle 1\", \"Subtitle 2\", \"Subtitle 3\"] # Placeholder\n", + " return subtitle_units\n", + "\n", + " def create_srt(self, subtitle_units):\n", + " # Generate SRT\n", + " srt_content = \"\"\n", + " for i, subtitle in enumerate(subtitle_units, start=1):\n", + " srt_content += f\"{i}\\n\"\n", + " srt_content += \"00:00:00,000 --> 00:00:05,000\\n\" # time stamps\n", + " srt_content += f\"{subtitle}\\n\\n\"\n", + " return srt_content\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " generator = SubtitleGenerator()\n", + " video_file = \"example_video.mp4\" # your own\n", + " text_file = \"example_text.txt\" \n", + " for language in generator.languages:\n", + " generator.generate_srt(video_file, text_file, language)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 896742ae8c6a691be93ba6c547baa353bf924139 Mon Sep 17 00:00:00 2001 From: Anushka Saxena Date: Fri, 3 May 2024 19:42:39 +0530 Subject: [PATCH 2/5] Improved Error Handling: --- .ipynb_checkpoints/Issue 2-checkpoint.ipynb | 6 +++ Issue 2.ipynb | 54 +++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 .ipynb_checkpoints/Issue 2-checkpoint.ipynb create mode 100644 Issue 2.ipynb diff --git a/.ipynb_checkpoints/Issue 2-checkpoint.ipynb b/.ipynb_checkpoints/Issue 2-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Issue 2-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Issue 2.ipynb b/Issue 2.ipynb new file mode 100644 index 0000000..a204c0e --- /dev/null +++ b/Issue 2.ipynb @@ -0,0 +1,54 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b11f2a2c", + "metadata": {}, + "outputs": [], + "source": [ + "# Issue 2 \n", + "Improved Error Handling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e967732", + "metadata": {}, + "outputs": [], + "source": [ + "def get_video_duration(video_path):\n", + " try:\n", + " clip = VideoFileClip(video_path)\n", + " duration = clip.duration\n", + " clip.close()\n", + " return duration\n", + " except Exception as e:\n", + " print(f\"Error getting duration of {video_path}: {e}\")\n", + " return None\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From edc6216b386977ff3dbdb2da0e25177a00e1164f Mon Sep 17 00:00:00 2001 From: Anushka Saxena Date: Fri, 3 May 2024 19:53:49 +0530 Subject: [PATCH 3/5] Dynamic Frame Rate Calculation --- .ipynb_checkpoints/Issue 3-checkpoint.ipynb | 6 ++ Issue 3.ipynb | 78 +++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 .ipynb_checkpoints/Issue 3-checkpoint.ipynb create mode 100644 Issue 3.ipynb diff --git a/.ipynb_checkpoints/Issue 3-checkpoint.ipynb b/.ipynb_checkpoints/Issue 3-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Issue 3-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Issue 3.ipynb b/Issue 3.ipynb new file mode 100644 index 0000000..d012351 --- /dev/null +++ b/Issue 3.ipynb @@ -0,0 +1,78 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "1aad2c2c", + "metadata": {}, + "outputs": [], + "source": [ + "# issue 3\n", + "#Dynamic Frame Rate Calculation" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2a158ea0", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_srt(video_path, text_path, output_path):\n", + " # Read text file\n", + " with open(text_path, 'r', encoding='utf-8') as f:\n", + " text = f.readlines()\n", + " \n", + " # Calculate subtitle timing dynamically\n", + " video_duration = get_video_duration(video_path)\n", + " num_subtitles = len(text)\n", + " if video_duration is None:\n", + " print(f\"Skipping {video_path} due to error in getting duration.\")\n", + " return\n", + " frame_rate = num_subtitles / video_duration\n", + "\n", + " # Generate SRT content\n", + " srt_content = \"\"\n", + " for i, line in enumerate(text):\n", + " start_time = format_time(i / frame_rate)\n", + " end_time = format_time((i + 1) / frame_rate)\n", + " srt_content += f\"{i+1}\\n{start_time} --> {end_time}\\n{line.strip()}\\n\\n\"\n", + " \n", + " # Write SRT file\n", + " with open(output_path, 'w', encoding='utf-8') as f:\n", + " f.write(srt_content)\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab52a98", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From fd6647dad5a89340ef6e6b53c75dbf27d5a15861 Mon Sep 17 00:00:00 2001 From: Anushka Saxena Date: Fri, 3 May 2024 19:56:38 +0530 Subject: [PATCH 4/5] Explanation of Dataset and Output Folder Paths & Consistent File Name Handling: --- .ipynb_checkpoints/Untitled-checkpoint.ipynb | 68 ++++++++++++++++++++ Untitled.ipynb | 68 ++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb create mode 100644 Untitled.ipynb diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..d908ed2 --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,68 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "42a39348", + "metadata": {}, + "outputs": [], + "source": [ + "#issue 4&5\n", + "#Explanation of Dataset and Output Folder Paths & Consistent File Name Handling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd7b4177", + "metadata": {}, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " dataset_path = \"path/to/dataset\" # Path to the directory containing video and text files\n", + " output_folder = \"path/to/output\" # Path to the directory where SRT files will be saved\n", + " main(dataset_path, output_folder)\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " def main(dataset_path, output_folder):\n", + " for language in os.listdir(dataset_path):\n", + " language_folder = os.path.join(dataset_path, language)\n", + " for video_name in os.listdir(language_folder):\n", + " if video_name.endswith('.mp4'):\n", + " video_path = os.path.join(language_folder, video_name)\n", + " text_name = video_name.replace('.mp4', '.txt')\n", + " text_path = os.path.join(language_folder, text_name)\n", + " if not os.path.isfile(text_path):\n", + " print(f\"Skipping {video_name}, corresponding text file not found.\")\n", + " continue\n", + " output_path = os.path.join(output_folder, language, video_name.replace('.mp4', '.srt'))\n", + " os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", + " generate_srt(video_path, text_path, output_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..d908ed2 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,68 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "42a39348", + "metadata": {}, + "outputs": [], + "source": [ + "#issue 4&5\n", + "#Explanation of Dataset and Output Folder Paths & Consistent File Name Handling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd7b4177", + "metadata": {}, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " dataset_path = \"path/to/dataset\" # Path to the directory containing video and text files\n", + " output_folder = \"path/to/output\" # Path to the directory where SRT files will be saved\n", + " main(dataset_path, output_folder)\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " def main(dataset_path, output_folder):\n", + " for language in os.listdir(dataset_path):\n", + " language_folder = os.path.join(dataset_path, language)\n", + " for video_name in os.listdir(language_folder):\n", + " if video_name.endswith('.mp4'):\n", + " video_path = os.path.join(language_folder, video_name)\n", + " text_name = video_name.replace('.mp4', '.txt')\n", + " text_path = os.path.join(language_folder, text_name)\n", + " if not os.path.isfile(text_path):\n", + " print(f\"Skipping {video_name}, corresponding text file not found.\")\n", + " continue\n", + " output_path = os.path.join(output_folder, language, video_name.replace('.mp4', '.srt'))\n", + " os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", + " generate_srt(video_path, text_path, output_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 70b516addaa87c78509d9e689e9e2639d2219a9e Mon Sep 17 00:00:00 2001 From: Anushka Saxena Date: Fri, 3 May 2024 19:58:33 +0530 Subject: [PATCH 5/5] Final code --- .ipynb_checkpoints/final one -checkpoint.ipynb | 6 ++++++ ...Untitled-checkpoint.ipynb => issue 4&5-checkpoint.ipynb} | 0 final one .ipynb | 6 ++++++ Untitled.ipynb => issue 4&5.ipynb | 0 4 files changed, 12 insertions(+) create mode 100644 .ipynb_checkpoints/final one -checkpoint.ipynb rename .ipynb_checkpoints/{Untitled-checkpoint.ipynb => issue 4&5-checkpoint.ipynb} (100%) create mode 100644 final one .ipynb rename Untitled.ipynb => issue 4&5.ipynb (100%) diff --git a/.ipynb_checkpoints/final one -checkpoint.ipynb b/.ipynb_checkpoints/final one -checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/final one -checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/issue 4&5-checkpoint.ipynb similarity index 100% rename from .ipynb_checkpoints/Untitled-checkpoint.ipynb rename to .ipynb_checkpoints/issue 4&5-checkpoint.ipynb diff --git a/final one .ipynb b/final one .ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/final one .ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Untitled.ipynb b/issue 4&5.ipynb similarity index 100% rename from Untitled.ipynb rename to issue 4&5.ipynb