Compare commits
No commits in common. "1c9fe28fc114df7001147e1835077146d5e5a07a" and "5b4e7188cd68be255dec5995f3c15a279f4c778a" have entirely different histories.
1c9fe28fc1
...
5b4e7188cd
2
.gitignore
vendored
2
.gitignore
vendored
@ -16,4 +16,4 @@
|
|||||||
/app_config.json
|
/app_config.json
|
||||||
/custom_logo
|
/custom_logo
|
||||||
/static/theme.css
|
/static/theme.css
|
||||||
/transcription_config.yml
|
/transcription_folder.yml
|
||||||
@ -6,23 +6,17 @@ import concurrent.futures
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import yaml
|
import yaml
|
||||||
import librosa
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
# model_name = "large-v3"
|
||||||
|
model_name = "medium"
|
||||||
|
|
||||||
# start time for transcription statistics
|
# start time for transcription statistics
|
||||||
start_time = 0
|
start_time = 0
|
||||||
total_audio_length = 0
|
total_audio_length = 0
|
||||||
|
|
||||||
with open("transcription_config.yml", "r", encoding="utf-8") as file:
|
with open("transcription_folder.yml", "r", encoding="utf-8") as file:
|
||||||
settings = yaml.safe_load(file)
|
settings = yaml.safe_load(file)
|
||||||
folder_list = settings.get("folder_list")
|
folder_list = settings.get("folder_list", [])
|
||||||
model_name = settings.get("model_name")
|
|
||||||
device = settings.get("device")
|
|
||||||
|
|
||||||
def load_audio_librosa(path: str, sr: int = 16_000) -> np.ndarray:
|
|
||||||
audio, orig_sr = librosa.load(path, sr=sr) # load + resample to 16 kHz
|
|
||||||
return audio
|
|
||||||
|
|
||||||
def format_timestamp(seconds):
|
def format_timestamp(seconds):
|
||||||
"""Format seconds into HH:MM:SS."""
|
"""Format seconds into HH:MM:SS."""
|
||||||
@ -223,13 +217,13 @@ def process_folder(root_folder):
|
|||||||
print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
|
print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
|
||||||
|
|
||||||
print("Loading Whisper model...")
|
print("Loading Whisper model...")
|
||||||
model = whisper.load_model(model_name, device=device)
|
model = whisper.load_model(model_name, device="cuda")
|
||||||
|
|
||||||
# Use a thread pool to pre-load files concurrently.
|
# Use a thread pool to pre-load files concurrently.
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
# Pre-load the first file.
|
# Pre-load the first file.
|
||||||
print("Initialize preloading process...")
|
print("Initialize preloading process...")
|
||||||
future_audio = executor.submit(load_audio_librosa, valid_files[0])
|
future_audio = executor.submit(whisper.load_audio, valid_files[0])
|
||||||
# Wait for the first file to be loaded.
|
# Wait for the first file to be loaded.
|
||||||
preloaded_audio = future_audio.result()
|
preloaded_audio = future_audio.result()
|
||||||
# Record start time for transcription statistics
|
# Record start time for transcription statistics
|
||||||
@ -239,7 +233,7 @@ def process_folder(root_folder):
|
|||||||
preloaded_audio = future_audio.result()
|
preloaded_audio = future_audio.result()
|
||||||
# Start loading the next file concurrently.
|
# Start loading the next file concurrently.
|
||||||
if i + 1 < len(valid_files):
|
if i + 1 < len(valid_files):
|
||||||
future_audio = executor.submit(load_audio_librosa, valid_files[i + 1])
|
future_audio = executor.submit(whisper.load_audio, valid_files[i + 1])
|
||||||
try: # continue with next file if a file fails
|
try: # continue with next file if a file fails
|
||||||
process_file(file_path, model, preloaded_audio)
|
process_file(file_path, model, preloaded_audio)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user