2 changed files with 10 additions and 16 deletions
--- a/.gitignore
+++ b/.gitignore
@ -16,4 +16,4 @@
 /app_config.json
 /custom_logo
 /static/theme.css
-/transcription_config.yml
+/transcription_folder.yml
--- a/transcribe_all.py
+++ b/transcribe_all.py
@ -6,23 +6,17 @@ import concurrent.futures
 import json
 import re
 import yaml
 import librosa
 import numpy as np
 # model_name = "large-v3"
 model_name = "medium"
 # start time for transcription statistics
 start_time = 0
 total_audio_length = 0
-with open("transcription_config.yml", "r", encoding="utf-8") as file:
+with open("transcription_folder.yml", "r", encoding="utf-8") as file:
    settings = yaml.safe_load(file)
-    folder_list = settings.get("folder_list")
+    folder_list = settings.get("folder_list", [])
    model_name = settings.get("model_name")
    device = settings.get("device")
 def load_audio_librosa(path: str, sr: int = 16_000) -> np.ndarray:
    audio, orig_sr = librosa.load(path, sr=sr)      # load + resample to 16 kHz
    return audio
 def format_timestamp(seconds):
    """Format seconds into HH:MM:SS."""
@ -223,13 +217,13 @@ def process_folder(root_folder):
        print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
    print("Loading Whisper model...")
-    model = whisper.load_model(model_name, device=device)
+    model = whisper.load_model(model_name, device="cuda")
    # Use a thread pool to pre-load files concurrently.
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Pre-load the first file.
        print("Initialize preloading process...")
-        future_audio = executor.submit(load_audio_librosa, valid_files[0])
+        future_audio = executor.submit(whisper.load_audio, valid_files[0])
        # Wait for the first file to be loaded.
        preloaded_audio = future_audio.result()
        # Record start time for transcription statistics
@ -239,7 +233,7 @@ def process_folder(root_folder):
            preloaded_audio = future_audio.result()
            # Start loading the next file concurrently.
            if i + 1 < len(valid_files):
-                future_audio = executor.submit(load_audio_librosa, valid_files[i + 1])
+                future_audio = executor.submit(whisper.load_audio, valid_files[i + 1])
            try: #  continue with next file if a file fails
                process_file(file_path, model, preloaded_audio)
            except Exception as e: