diff --git a/.ipynb_checkpoints/Issue 1-checkpoint.ipynb b/.ipynb_checkpoints/Issue 1-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Issue 1-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/Issue 2-checkpoint.ipynb b/.ipynb_checkpoints/Issue 2-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Issue 2-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/Issue 3-checkpoint.ipynb b/.ipynb_checkpoints/Issue 3-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Issue 3-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/final one -checkpoint.ipynb b/.ipynb_checkpoints/final one -checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/final one -checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/issue 4&5-checkpoint.ipynb b/.ipynb_checkpoints/issue 4&5-checkpoint.ipynb new file mode 100644 index 0000000..d908ed2 --- /dev/null +++ b/.ipynb_checkpoints/issue 4&5-checkpoint.ipynb @@ -0,0 +1,68 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "42a39348", + "metadata": {}, + "outputs": [], + "source": [ + "#issue 4&5\n", + "#Explanation of Dataset and Output Folder Paths & Consistent File Name Handling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd7b4177", + "metadata": {}, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " dataset_path = \"path/to/dataset\" # Path to the directory containing video and text files\n", + " output_folder = \"path/to/output\" # Path to the directory where SRT files will be saved\n", + " main(dataset_path, output_folder)\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " def main(dataset_path, output_folder):\n", + " for language in os.listdir(dataset_path):\n", + " language_folder = os.path.join(dataset_path, language)\n", + " for video_name in os.listdir(language_folder):\n", + " if video_name.endswith('.mp4'):\n", + " video_path = os.path.join(language_folder, video_name)\n", + " text_name = video_name.replace('.mp4', '.txt')\n", + " text_path = os.path.join(language_folder, text_name)\n", + " if not os.path.isfile(text_path):\n", + " print(f\"Skipping {video_name}, corresponding text file not found.\")\n", + " continue\n", + " output_path = os.path.join(output_folder, language, video_name.replace('.mp4', '.srt'))\n", + " os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", + " generate_srt(video_path, text_path, output_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Issue 1.ipynb b/Issue 1.ipynb new file mode 100644 index 0000000..c5f7262 --- /dev/null +++ b/Issue 1.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "207e376a", + "metadata": {}, + "source": [ + "# Creating project from scratch " + ] + }, + { + "cell_type": "markdown", + "id": "2d94d30c", + "metadata": {}, + "source": [ + "Please find below a description of a Python script that can generate SRT (SubRip) files for videos in Tamil, Telugu, and Kannada languages. The script takes a video file and a corresponding text file as input and generates SRT files with subtitles for the given video content. Here is a step-by-step guide on how the script works and what you need to run it:\n", + "\n", + "1. Purpose: The script is designed to generate SRT files with subtitles for Tamil, Telugu, and Kannada videos. The output file will have the same name as the input video file, with the language appended at the end (for example, `example_video_Tamil.srt`).\n", + "\n", + "2. How it Works:\n", + " - The script first loads the video content and the text content from the provided files.\n", + " - Depending on the language chosen (Tamil, Telugu, or Kannada), it aligns and segments the text to synchronize with the video content.\n", + " - The alignment and segmentation are language-specific, with separate methods for Tamil, Telugu, and Kannada.\n", + " - Finally, it creates SRT files with the generated subtitles.\n", + "\n", + "3. What's Needed:\n", + " - Video File: You need a video file in a format that can be processed. Make sure to provide the correct path to the video file (for example, `example_video.mp4` in this script).\n", + " - Text File: You need a text file containing the transcript or dialogue of the video. This text should correspond to the language of the video. Provide the path to this file (for example, `example_text.txt` in this script).\n", + " - Dependencies: The script relies on Python's standard library and does not require any additional dependencies.\n", + "\n", + "4. Running the Script:\n", + " - Save the script into a Python file (for example, `subtitle_generator.py`).\n", + " - Place your video file and text file in the same directory as the script.\n", + " - Run the script in a Python environment.\n", + " - It will generate SRT files for each language specified (Tamil, Telugu, and Kannada), each named after the video file with the language appended (for example, `example_video_Tamil.srt`).\n", + " - Check the console output for confirmation of successful SRT generation.\n", + "\n", + "Please note that the placeholder methods (`load_video`, `align_and_segment_tamil`, `align_and_segment_telugu`, `align_and_segment_kannada`) in the script need to be replaced with actual implementations tailored to your requirements, especially for loading the video and aligning/segmenting the text based on the language." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b06471e", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "class SubtitleGenerator:\n", + " def __init__(self):\n", + " self.languages = [\"Tamil\", \"Telugu\", \"Kannada\"]\n", + " \n", + " def generate_srt(self, video_file, text_file, language):\n", + " # Load video and text content\n", + " video_content = self.load_video(video_file)\n", + " text_content = self.load_text(text_file)\n", + " \n", + " # Perform alignment and segmentation based on language\n", + " if language == \"Tamil\":\n", + " subtitle_units = self.align_and_segment_tamil(video_content, text_content)\n", + " elif language == \"Telugu\":\n", + " subtitle_units = self.align_and_segment_telugu(video_content, text_content)\n", + " elif language == \"Kannada\":\n", + " subtitle_units = self.align_and_segment_kannada(video_content, text_content)\n", + " else:\n", + " print(\"Language not supported.\")\n", + " return\n", + " \n", + " # Generate SRT file\n", + " srt_content = self.create_srt(subtitle_units)\n", + " srt_filename = os.path.splitext(video_file)[0] + \"_\" + language + \".srt\"\n", + " with open(srt_filename, \"w\", encoding=\"utf-8\") as srt_file:\n", + " srt_file.write(srt_content)\n", + " \n", + " print(f\"SRT file for {language} generated successfully.\")\n", + "\n", + " def load_video(self, video_file):\n", + " # Load video file\n", + " # Placeholder for actual video loading process\n", + " print(f\"Video file '{video_file}' loaded.\")\n", + " return \"Video Content\"\n", + "\n", + " def load_text(self, text_file):\n", + " # Load text file\n", + " with open(text_file, \"r\", encoding=\"utf-8\") as file:\n", + " text_content = file.read()\n", + " print(f\"Text file '{text_file}' loaded.\")\n", + " return text_content\n", + "\n", + " def align_and_segment_tamil(self, video_content, text_content):\n", + " # Placeholder for Tamil alignment and segmentation\n", + " # Implement alignment and segmentation specific to Tamil\n", + " print(\"Aligning and segmenting Tamil text...\")\n", + " subtitle_units = [\"Subtitle 1\", \"Subtitle 2\", \"Subtitle 3\"] # Placeholder\n", + " return subtitle_units\n", + "\n", + " def align_and_segment_telugu(self, video_content, text_content):\n", + " \n", + " print(\"Aligning and segmenting Telugu text...\")\n", + " subtitle_units = [\"Subtitle 1\", \"Subtitle 2\", \"Subtitle 3\"] # Placeholder\n", + " return subtitle_units\n", + "\n", + " def align_and_segment_kannada(self, video_content, text_content):\n", + " \n", + " print(\"Aligning and segmenting Kannada text...\")\n", + " subtitle_units = [\"Subtitle 1\", \"Subtitle 2\", \"Subtitle 3\"] # Placeholder\n", + " return subtitle_units\n", + "\n", + " def create_srt(self, subtitle_units):\n", + " # Generate SRT\n", + " srt_content = \"\"\n", + " for i, subtitle in enumerate(subtitle_units, start=1):\n", + " srt_content += f\"{i}\\n\"\n", + " srt_content += \"00:00:00,000 --> 00:00:05,000\\n\" # time stamps\n", + " srt_content += f\"{subtitle}\\n\\n\"\n", + " return srt_content\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " generator = SubtitleGenerator()\n", + " video_file = \"example_video.mp4\" # your own\n", + " text_file = \"example_text.txt\" \n", + " for language in generator.languages:\n", + " generator.generate_srt(video_file, text_file, language)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Issue 2.ipynb b/Issue 2.ipynb new file mode 100644 index 0000000..a204c0e --- /dev/null +++ b/Issue 2.ipynb @@ -0,0 +1,54 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b11f2a2c", + "metadata": {}, + "outputs": [], + "source": [ + "# Issue 2 \n", + "Improved Error Handling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e967732", + "metadata": {}, + "outputs": [], + "source": [ + "def get_video_duration(video_path):\n", + " try:\n", + " clip = VideoFileClip(video_path)\n", + " duration = clip.duration\n", + " clip.close()\n", + " return duration\n", + " except Exception as e:\n", + " print(f\"Error getting duration of {video_path}: {e}\")\n", + " return None\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Issue 3.ipynb b/Issue 3.ipynb new file mode 100644 index 0000000..d012351 --- /dev/null +++ b/Issue 3.ipynb @@ -0,0 +1,78 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "1aad2c2c", + "metadata": {}, + "outputs": [], + "source": [ + "# issue 3\n", + "#Dynamic Frame Rate Calculation" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2a158ea0", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_srt(video_path, text_path, output_path):\n", + " # Read text file\n", + " with open(text_path, 'r', encoding='utf-8') as f:\n", + " text = f.readlines()\n", + " \n", + " # Calculate subtitle timing dynamically\n", + " video_duration = get_video_duration(video_path)\n", + " num_subtitles = len(text)\n", + " if video_duration is None:\n", + " print(f\"Skipping {video_path} due to error in getting duration.\")\n", + " return\n", + " frame_rate = num_subtitles / video_duration\n", + "\n", + " # Generate SRT content\n", + " srt_content = \"\"\n", + " for i, line in enumerate(text):\n", + " start_time = format_time(i / frame_rate)\n", + " end_time = format_time((i + 1) / frame_rate)\n", + " srt_content += f\"{i+1}\\n{start_time} --> {end_time}\\n{line.strip()}\\n\\n\"\n", + " \n", + " # Write SRT file\n", + " with open(output_path, 'w', encoding='utf-8') as f:\n", + " f.write(srt_content)\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab52a98", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/final one .ipynb b/final one .ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/final one .ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/issue 4&5.ipynb b/issue 4&5.ipynb new file mode 100644 index 0000000..d908ed2 --- /dev/null +++ b/issue 4&5.ipynb @@ -0,0 +1,68 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "42a39348", + "metadata": {}, + "outputs": [], + "source": [ + "#issue 4&5\n", + "#Explanation of Dataset and Output Folder Paths & Consistent File Name Handling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd7b4177", + "metadata": {}, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " dataset_path = \"path/to/dataset\" # Path to the directory containing video and text files\n", + " output_folder = \"path/to/output\" # Path to the directory where SRT files will be saved\n", + " main(dataset_path, output_folder)\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " def main(dataset_path, output_folder):\n", + " for language in os.listdir(dataset_path):\n", + " language_folder = os.path.join(dataset_path, language)\n", + " for video_name in os.listdir(language_folder):\n", + " if video_name.endswith('.mp4'):\n", + " video_path = os.path.join(language_folder, video_name)\n", + " text_name = video_name.replace('.mp4', '.txt')\n", + " text_path = os.path.join(language_folder, text_name)\n", + " if not os.path.isfile(text_path):\n", + " print(f\"Skipping {video_name}, corresponding text file not found.\")\n", + " continue\n", + " output_path = os.path.join(output_folder, language, video_name.replace('.mp4', '.srt'))\n", + " os.makedirs(os.path.dirname(output_path), exist_ok=True)\n", + " generate_srt(video_path, text_path, output_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}