YouTube-to-Transcript/script.py

43 lines
1.5 KiB
Python
Raw Permalink Normal View History

2024-06-27 21:03:11 +00:00
from pytube import YouTube
import whisper
import os
import re
import warnings
def sanitize_filename(filename):
return re.sub(r'[\\/*?:"<>|]', '_', filename)
def save_transcription(transcription, folder_path, file_name='transcription.txt'):
file_path = os.path.join(folder_path, file_name)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(transcription)
return file_path
def download_youtube_audio(url, folder_path):
yt = YouTube(url)
video_title = sanitize_filename(yt.title)
folder_path = os.path.join(folder_path, video_title)
os.makedirs(folder_path, exist_ok=True)
audio_path = os.path.join(folder_path, 'audio.mp3')
yt.streams.filter(only_audio=True).first().download(filename=audio_path)
return audio_path, folder_path
def transcribe_audio(audio_path, model_name='base'):
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
model = whisper.load_model(model_name)
result = model.transcribe(audio_path)
return result['text']
video_url = input("Enter the YouTube video URL: ")
folder_path = 'Downloaded_Audios'
audio_path, folder_path = download_youtube_audio(video_url, folder_path)
transcription = transcribe_audio(audio_path)
transcription_file_path = save_transcription(transcription, folder_path)
print(f"Transcription saved to: {transcription_file_path}")
print("\nTranscription:\n")
print(transcription)