update transcription

This commit is contained in:
lelo 2026-01-24 18:45:16 +01:00
parent ffc5f67bde
commit ac9182bf16
2 changed files with 47 additions and 22 deletions

View File

@ -139,21 +139,21 @@ def process_file(file_path, model, audio_input):
Transcribe the audio file into one markdown file. Transcribe the audio file into one markdown file.
If special case (German sermon in Russian or Russian-marked file), transcribe both in Russian and German into the same file. If special case (German sermon in Russian or Russian-marked file), transcribe both in Russian and German into the same file.
""" """
file_name = os.path.basename(file_path) # file_name = os.path.basename(file_path)
# Detect spoken language # # Detect spoken language
detected = detect_language(model, audio_input) # detected = detect_language(model, audio_input)
# Determine which languages to transcribe # # Determine which languages to transcribe
if (detected == 'ru' and 'predigt' in file_name.lower()) or \ # if (detected == 'ru' and 'predigt' in file_name.lower()) or \
(detected == 'de' and 'russisch' in file_name.lower()): # (detected == 'de' and 'russisch' in file_name.lower()):
langs = ['de', 'ru'] # langs = ['de', 'ru']
elif detected == 'en': # songs often mis-detected as English # elif detected == 'en': # songs often mis-detected as English
langs = ['de'] # langs = ['de']
elif detected in ('de', 'ru'): # elif detected in ('de', 'ru'):
langs = [detected] # langs = [detected]
else: # else:
langs = ['ru'] langs = ['de', 'ru', 'en']
# Collect segments for combined result # Collect segments for combined result
lang_collection = {} lang_collection = {}

View File

@ -129,13 +129,38 @@ def process_file(file_path, model, audio_input, language=None, postfix=None):
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) != 2: # Folder where your audio/video files are stored
print("Usage: python transcribe_all.py <file>") input_folder = "transcribe_single"
# Check if folder exists
if not os.path.isdir(input_folder):
print(f"Error: Folder '{input_folder}' not found.")
sys.exit(1) sys.exit(1)
file_name_path = sys.argv[1] # List all supported file types
supported_ext = (".mp3", ".wav", ".m4a", ".mp4", ".mov", ".flac", ".ogg")
files = [
os.path.join(input_folder, f)
for f in os.listdir(input_folder)
if f.lower().endswith(supported_ext)
]
if not files:
print(f"No audio/video files found in '{input_folder}'.")
sys.exit(1)
print(f"Found {len(files)} file(s) in '{input_folder}':")
for f in files:
print(f" - {f}")
print("\nLoading Whisper model...")
model = whisper.load_model(model_name, device="cuda") # or "cpu" if no GPU
# Process each file one by one
for file_path in files:
try:
audio = whisper.load_audio(file_path)
process_file(file_path, model, audio, "de") # or None to auto-detect language
except Exception as e:
print(f"Error processing {file_path}: {e}")
print("Loading Whisper model...")
model = whisper.load_model(model_name, device="cuda")
audio = whisper.load_audio(file_name_path)
process_file(file_name_path, model, audio, "de")