Compare commits

...

2 Commits

Author SHA1 Message Date
1c9fe28fc1 Merge branch 'master' of https://gitea.centx.de/lelo/bethaus-app 2025-05-29 10:19:36 +02:00
a6e77eab74 remove ffmpeg 2025-05-29 10:19:24 +02:00
2 changed files with 16 additions and 10 deletions

2
.gitignore vendored
View File

@ -16,4 +16,4 @@
/app_config.json /app_config.json
/custom_logo /custom_logo
/static/theme.css /static/theme.css
/transcription_folder.yml /transcription_config.yml

View File

@ -6,17 +6,23 @@ import concurrent.futures
import json import json
import re import re
import yaml import yaml
import librosa
import numpy as np
# model_name = "large-v3"
model_name = "medium"
# start time for transcription statistics # start time for transcription statistics
start_time = 0 start_time = 0
total_audio_length = 0 total_audio_length = 0
with open("transcription_folder.yml", "r", encoding="utf-8") as file: with open("transcription_config.yml", "r", encoding="utf-8") as file:
settings = yaml.safe_load(file) settings = yaml.safe_load(file)
folder_list = settings.get("folder_list", []) folder_list = settings.get("folder_list")
model_name = settings.get("model_name")
device = settings.get("device")
def load_audio_librosa(path: str, sr: int = 16_000) -> np.ndarray:
audio, orig_sr = librosa.load(path, sr=sr) # load + resample to 16 kHz
return audio
def format_timestamp(seconds): def format_timestamp(seconds):
"""Format seconds into HH:MM:SS.""" """Format seconds into HH:MM:SS."""
@ -217,13 +223,13 @@ def process_folder(root_folder):
print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.") print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
print("Loading Whisper model...") print("Loading Whisper model...")
model = whisper.load_model(model_name, device="cuda") model = whisper.load_model(model_name, device=device)
# Use a thread pool to pre-load files concurrently. # Use a thread pool to pre-load files concurrently.
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
# Pre-load the first file. # Pre-load the first file.
print("Initialize preloading process...") print("Initialize preloading process...")
future_audio = executor.submit(whisper.load_audio, valid_files[0]) future_audio = executor.submit(load_audio_librosa, valid_files[0])
# Wait for the first file to be loaded. # Wait for the first file to be loaded.
preloaded_audio = future_audio.result() preloaded_audio = future_audio.result()
# Record start time for transcription statistics # Record start time for transcription statistics
@ -233,7 +239,7 @@ def process_folder(root_folder):
preloaded_audio = future_audio.result() preloaded_audio = future_audio.result()
# Start loading the next file concurrently. # Start loading the next file concurrently.
if i + 1 < len(valid_files): if i + 1 < len(valid_files):
future_audio = executor.submit(whisper.load_audio, valid_files[i + 1]) future_audio = executor.submit(load_audio_librosa, valid_files[i + 1])
try: # continue with next file if a file fails try: # continue with next file if a file fails
process_file(file_path, model, preloaded_audio) process_file(file_path, model, preloaded_audio)
except Exception as e: except Exception as e: