diff --git a/.gitignore b/.gitignore index e12ff74..7aaba4f 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,4 @@ /app_config.json /custom_logo /static/theme.css -/transcription_folder.yml \ No newline at end of file +/transcription_config.yml \ No newline at end of file diff --git a/transcribe_all.py b/transcribe_all.py index b2722b4..de02fa6 100755 --- a/transcribe_all.py +++ b/transcribe_all.py @@ -6,17 +6,23 @@ import concurrent.futures import json import re import yaml +import librosa +import numpy as np -# model_name = "large-v3" -model_name = "medium" # start time for transcription statistics start_time = 0 total_audio_length = 0 -with open("transcription_folder.yml", "r", encoding="utf-8") as file: +with open("transcription_config.yml", "r", encoding="utf-8") as file: settings = yaml.safe_load(file) - folder_list = settings.get("folder_list", []) + folder_list = settings.get("folder_list") + model_name = settings.get("model_name") + device = settings.get("device") + +def load_audio_librosa(path: str, sr: int = 16_000) -> np.ndarray: + audio, orig_sr = librosa.load(path, sr=sr) # load + resample to 16 kHz + return audio def format_timestamp(seconds): """Format seconds into HH:MM:SS.""" @@ -209,7 +215,7 @@ def process_folder(root_folder): continue valid_files.append(file_path) - + if len(valid_files) == 0: print(f"Checked {checked_files} files. All files are transcribed.") return @@ -217,13 +223,13 @@ def process_folder(root_folder): print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.") print("Loading Whisper model...") - model = whisper.load_model(model_name, device="cuda") + model = whisper.load_model(model_name, device=device) # Use a thread pool to pre-load files concurrently. with concurrent.futures.ThreadPoolExecutor() as executor: # Pre-load the first file. print("Initialize preloading process...") - future_audio = executor.submit(whisper.load_audio, valid_files[0]) + future_audio = executor.submit(load_audio_librosa, valid_files[0]) # Wait for the first file to be loaded. preloaded_audio = future_audio.result() # Record start time for transcription statistics @@ -233,7 +239,7 @@ def process_folder(root_folder): preloaded_audio = future_audio.result() # Start loading the next file concurrently. if i + 1 < len(valid_files): - future_audio = executor.submit(whisper.load_audio, valid_files[i + 1]) + future_audio = executor.submit(load_audio_librosa, valid_files[i + 1]) try: # continue with next file if a file fails process_file(file_path, model, preloaded_audio) except Exception as e: @@ -241,6 +247,6 @@ def process_folder(root_folder): print(e) if __name__ == "__main__": - for folder in folder_list: + for folder in folder_list: process_folder(folder) print("All done!") \ No newline at end of file