diff --git a/Auto-subtitler using openai whisper/1.mp3 b/Auto-subtitler using openai whisper/1.mp3 new file mode 100644 index 0000000..521427e Binary files /dev/null and b/Auto-subtitler using openai whisper/1.mp3 differ diff --git a/Auto-subtitler using openai whisper/README.md b/Auto-subtitler using openai whisper/README.md new file mode 100644 index 0000000..e69de29 diff --git a/Auto-subtitler using openai whisper/app.py b/Auto-subtitler using openai whisper/app.py new file mode 100644 index 0000000..b32f101 --- /dev/null +++ b/Auto-subtitler using openai whisper/app.py @@ -0,0 +1,36 @@ +import moviepy.editor +from datetime import timedelta +import os +import whisper +import google.generativeai as genai +import gradio as gr + +# step1: extracting audio from video +video=moviepy.editor.VideoFileClip('tamil.mp4') +audio=video.audio +audio.write_audiofile('1.mp3') + + +# using opemai whisper to generate text from audio in almost 50 languages including +model = whisper.load_model("base") +print("Whisper model loaded.") + +# defining a transcribe function +def transcribe_audio(path): + transcribe = model.transcribe(audio=path) + segments = transcribe['segments'] + + for segment in segments: + startTime = str(0)+str(timedelta(seconds=int(segment['start'])))+',000' + endTime = str(0)+str(timedelta(seconds=int(segment['end'])))+',000' + text = segment['text'] + segmentId = segment['id']+1 + segment = f"{segmentId}\n{startTime} --> {endTime}\n{text[1:] if text[0] is ' ' else text}\n\n" + srtFilename = os.path.join("inTamil.srt") + with open(srtFilename, 'a', encoding='utf-8') as srtFile: + srtFile.write(segment) + return srtFilename + + +# one examplevvv +transcribe_audio("tamil.mp4") \ No newline at end of file diff --git a/Auto-subtitler using openai whisper/inTamil.srt b/Auto-subtitler using openai whisper/inTamil.srt new file mode 100644 index 0000000..f723918 --- /dev/null +++ b/Auto-subtitler using openai whisper/inTamil.srt @@ -0,0 +1,72 @@ +1 +00:00:00,000 --> 00:00:00,000 +சாதிரனே என்னைகளுக்கு நான். + +2 +00:00:00,000 --> 00:00:02,000 +தீவால், 1PRP + +3 +00:00:02,000 --> 00:00:04,000 +தொழ்ளுக்கு அல்லவார்கால், கலைrepோழம் + +4 +00:00:04,000 --> 00:00:06,000 +அவள் ஆமாம், தொழ்ளுfirutt வா சனத்தினி தெருகினோ. + +5 +00:00:06,000 --> 00:00:08,000 +வேலபாக்லை என்ற 다 பத்தான வாங்தால், + +6 +00:00:08,000 --> 00:00:10,000 +வாங்தால்,ூக விசக்கெலுக்கு இருக்கு அவனுதுச் செலுங்கள். + +7 +00:00:11,000 --> 00:00:13,000 +உளுக்குச் செலில் என்ன வழை நனந்ததான்போடு சாதிரியா? + +8 +00:00:30,000 --> 00:00:33,000 +Soundaryアனகாளா YouTubersを la It is a profitable issue + +9 +00:00:33,000 --> 00:00:42,000 +Täisationsெல்லாம் காஞ்சடி இருக்கிறோம் + +10 +00:00:42,000 --> 00:00:44,000 +ஒரு காஸ்டவிடிருங்கள் + +11 +00:00:44,000 --> 00:00:45,000 +சaufா DARRANG + +12 +00:00:45,000 --> 00:00:46,000 +இப்போல அம்மைத்தியை வாழி செளி இப்prising பேட்டெல்று போரும + +13 +00:00:47,000 --> 00:00:48,000 +் யயோ ஒன்றுமே துெளியa + +14 +00:00:48,000 --> 00:00:49,000 +நாைத்ததாததே சா ப alarயாதே தெலியோ + +15 +00:00:50,000 --> 00:00:51,000 +ஒரு முசமாமதை வேடிம் + +16 +00:00:52,000 --> 00:00:53,000 +பேphabetியை வாழி , நிடுத்ததாயா + +17 +00:00:53,000 --> 00:00:53,000 +problemas + +18 +00:00:53,000 --> 00:00:55,000 +ஆutes G 有 + diff --git a/Auto-subtitler using openai whisper/tamil.mp4 b/Auto-subtitler using openai whisper/tamil.mp4 new file mode 100644 index 0000000..0fefee9 Binary files /dev/null and b/Auto-subtitler using openai whisper/tamil.mp4 differ