diff --git a/transcribe_all.py b/transcribe_all.py index ef4d5db..a61332e 100755 --- a/transcribe_all.py +++ b/transcribe_all.py @@ -139,21 +139,21 @@ def process_file(file_path, model, audio_input): Transcribe the audio file into one markdown file. If special case (German sermon in Russian or Russian-marked file), transcribe both in Russian and German into the same file. """ - file_name = os.path.basename(file_path) + # file_name = os.path.basename(file_path) - # Detect spoken language - detected = detect_language(model, audio_input) + # # Detect spoken language + # detected = detect_language(model, audio_input) - # Determine which languages to transcribe - if (detected == 'ru' and 'predigt' in file_name.lower()) or \ - (detected == 'de' and 'russisch' in file_name.lower()): - langs = ['de', 'ru'] - elif detected == 'en': # songs often mis-detected as English - langs = ['de'] - elif detected in ('de', 'ru'): - langs = [detected] - else: - langs = ['ru'] + # # Determine which languages to transcribe + # if (detected == 'ru' and 'predigt' in file_name.lower()) or \ + # (detected == 'de' and 'russisch' in file_name.lower()): + # langs = ['de', 'ru'] + # elif detected == 'en': # songs often mis-detected as English + # langs = ['de'] + # elif detected in ('de', 'ru'): + # langs = [detected] + # else: + langs = ['de', 'ru', 'en'] # Collect segments for combined result lang_collection = {} diff --git a/transcribe_single_file.py b/transcribe_single_file.py index 587c25e..4ae2333 100644 --- a/transcribe_single_file.py +++ b/transcribe_single_file.py @@ -129,13 +129,38 @@ def process_file(file_path, model, audio_input, language=None, postfix=None): if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python transcribe_all.py ") + # Folder where your audio/video files are stored + input_folder = "transcribe_single" + + # Check if folder exists + if not os.path.isdir(input_folder): + print(f"Error: Folder '{input_folder}' not found.") sys.exit(1) - - file_name_path = sys.argv[1] - - print("Loading Whisper model...") - model = whisper.load_model(model_name, device="cuda") - audio = whisper.load_audio(file_name_path) - process_file(file_name_path, model, audio, "de") + + # List all supported file types + supported_ext = (".mp3", ".wav", ".m4a", ".mp4", ".mov", ".flac", ".ogg") + files = [ + os.path.join(input_folder, f) + for f in os.listdir(input_folder) + if f.lower().endswith(supported_ext) + ] + + if not files: + print(f"No audio/video files found in '{input_folder}'.") + sys.exit(1) + + print(f"Found {len(files)} file(s) in '{input_folder}':") + for f in files: + print(f" - {f}") + + print("\nLoading Whisper model...") + model = whisper.load_model(model_name, device="cuda") # or "cpu" if no GPU + + # Process each file one by one + for file_path in files: + try: + audio = whisper.load_audio(file_path) + process_file(file_path, model, audio, "de") # or None to auto-detect language + except Exception as e: + print(f"Error processing {file_path}: {e}") +