Compare commits
2 Commits
5b4e7188cd
...
1c9fe28fc1
| Author | SHA1 | Date | |
|---|---|---|---|
| 1c9fe28fc1 | |||
| a6e77eab74 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -16,4 +16,4 @@
|
||||
/app_config.json
|
||||
/custom_logo
|
||||
/static/theme.css
|
||||
/transcription_folder.yml
|
||||
/transcription_config.yml
|
||||
@ -6,17 +6,23 @@ import concurrent.futures
|
||||
import json
|
||||
import re
|
||||
import yaml
|
||||
import librosa
|
||||
import numpy as np
|
||||
|
||||
# model_name = "large-v3"
|
||||
model_name = "medium"
|
||||
|
||||
# start time for transcription statistics
|
||||
start_time = 0
|
||||
total_audio_length = 0
|
||||
|
||||
with open("transcription_folder.yml", "r", encoding="utf-8") as file:
|
||||
with open("transcription_config.yml", "r", encoding="utf-8") as file:
|
||||
settings = yaml.safe_load(file)
|
||||
folder_list = settings.get("folder_list", [])
|
||||
folder_list = settings.get("folder_list")
|
||||
model_name = settings.get("model_name")
|
||||
device = settings.get("device")
|
||||
|
||||
def load_audio_librosa(path: str, sr: int = 16_000) -> np.ndarray:
|
||||
audio, orig_sr = librosa.load(path, sr=sr) # load + resample to 16 kHz
|
||||
return audio
|
||||
|
||||
def format_timestamp(seconds):
|
||||
"""Format seconds into HH:MM:SS."""
|
||||
@ -217,13 +223,13 @@ def process_folder(root_folder):
|
||||
print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
|
||||
|
||||
print("Loading Whisper model...")
|
||||
model = whisper.load_model(model_name, device="cuda")
|
||||
model = whisper.load_model(model_name, device=device)
|
||||
|
||||
# Use a thread pool to pre-load files concurrently.
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
# Pre-load the first file.
|
||||
print("Initialize preloading process...")
|
||||
future_audio = executor.submit(whisper.load_audio, valid_files[0])
|
||||
future_audio = executor.submit(load_audio_librosa, valid_files[0])
|
||||
# Wait for the first file to be loaded.
|
||||
preloaded_audio = future_audio.result()
|
||||
# Record start time for transcription statistics
|
||||
@ -233,7 +239,7 @@ def process_folder(root_folder):
|
||||
preloaded_audio = future_audio.result()
|
||||
# Start loading the next file concurrently.
|
||||
if i + 1 < len(valid_files):
|
||||
future_audio = executor.submit(whisper.load_audio, valid_files[i + 1])
|
||||
future_audio = executor.submit(load_audio_librosa, valid_files[i + 1])
|
||||
try: # continue with next file if a file fails
|
||||
process_file(file_path, model, preloaded_audio)
|
||||
except Exception as e:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user