Merge branch 'master' of https://gitea.centx.de/lelo/bethaus-app

remove ffmpeg
2025-05-29 10:19:36 +02:00 · 2025-05-29 10:19:24 +02:00
2 changed files with 16 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@ -16,4 +16,4 @@
 /app_config.json
 /custom_logo
 /static/theme.css
-/transcription_folder.yml
+/transcription_config.yml
--- a/transcribe_all.py
+++ b/transcribe_all.py
@ -6,17 +6,23 @@ import concurrent.futures
 import json
 import re
 import yaml
+import librosa
+import numpy as np

-# model_name = "large-v3"
-model_name = "medium"

 # start time for transcription statistics
 start_time = 0
 total_audio_length = 0

-with open("transcription_folder.yml", "r", encoding="utf-8") as file:
+with open("transcription_config.yml", "r", encoding="utf-8") as file:
    settings = yaml.safe_load(file)
-    folder_list = settings.get("folder_list", [])
+    folder_list = settings.get("folder_list")
+    model_name = settings.get("model_name")
+    device = settings.get("device")
+
+def load_audio_librosa(path: str, sr: int = 16_000) -> np.ndarray:
+    audio, orig_sr = librosa.load(path, sr=sr)      # load + resample to 16 kHz
+    return audio

 def format_timestamp(seconds):
    """Format seconds into HH:MM:SS."""
@ -217,13 +223,13 @@ def process_folder(root_folder):
        print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
    
    print("Loading Whisper model...")
-    model = whisper.load_model(model_name, device="cuda")
+    model = whisper.load_model(model_name, device=device)
    
    # Use a thread pool to pre-load files concurrently.
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Pre-load the first file.
        print("Initialize preloading process...")
-        future_audio = executor.submit(whisper.load_audio, valid_files[0])
+        future_audio = executor.submit(load_audio_librosa, valid_files[0])
        # Wait for the first file to be loaded.
        preloaded_audio = future_audio.result()
        # Record start time for transcription statistics
@ -233,7 +239,7 @@ def process_folder(root_folder):
            preloaded_audio = future_audio.result()
            # Start loading the next file concurrently.
            if i + 1 < len(valid_files):
-                future_audio = executor.submit(whisper.load_audio, valid_files[i + 1])
+                future_audio = executor.submit(load_audio_librosa, valid_files[i + 1])
            try: #  continue with next file if a file fails
                process_file(file_path, model, preloaded_audio)
            except Exception as e:
Author	SHA1	Message	Date
lelo	1c9fe28fc1	Merge branch 'master' of https://gitea.centx.de/lelo/bethaus-app	2025-05-29 10:19:36 +02:00
lelo	a6e77eab74	remove ffmpeg	2025-05-29 10:19:24 +02:00