diff --git a/transcribe_all.py b/transcribe_all.py index bfea85a..ef4d5db 100755 --- a/transcribe_all.py +++ b/transcribe_all.py @@ -214,21 +214,32 @@ def process_file(file_path, model, audio_input): def process_folder(root_folder): """ - Walk through root_folder and process .mp3 files, applying skip rules. - Only files that need to be transcribed (i.e. transcription does not already exist) - will have their audio pre-loaded concurrently. + Walk through root_folder and process .mp3 files. + Differentiates between “folder not found” and “folder empty.” + Select files that need to be transcribed (i.e. transcription does not already exist, applying certain rules) """ global start_time keywords = ["musik", "chor", "lied", "gesang", "orchester", "orhester", "melodi", "sot"] print("Create file list...") + # path actually exist / can we even try to list it? + if not os.path.exists(root_folder): + print(f"Error: Path '{root_folder}' does not exist or is not reachable.") + return + + if not os.path.isdir(root_folder): + print(f"Error: Path '{root_folder}' exists but is not a folder.") + return + + # Now we know the folder exists; let's scan it. + print(f"Scanning '{root_folder}' for .mp3 files…") valid_files = [] - checked_files = 0 - # Walk the folder and build a list of files to transcribe. + checked_files = 0 + for dirpath, _, filenames in os.walk(root_folder): for filename in filenames: if filename.lower().endswith(".mp3"): - checked_files = checked_files + 1 + checked_files += 1 filename_lower = filename.lower() file_path = os.path.join(dirpath, filename) # Skip files with skip keywords. @@ -245,11 +256,19 @@ def process_folder(root_folder): valid_files.append(file_path) - if len(valid_files) == 0: - print(f"Checked {checked_files} files. All files are transcribed.") + # If the folder was empty of .mp3s, checked_files will be 0, + # but we know it existed because we passed the exists()/isdir() tests. + if checked_files == 0: + print(f"Checked 0 files in '{root_folder}'. Folder is empty of .mp3s.") return - else: - print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.") + + # If you made it here, checked_files > 0 but maybe all were already transcribed: + if len(valid_files) == 0: + print(f"Checked {checked_files} files. All files are already transcribed.") + return + + # Otherwise you have files to process… + print(f"Checked {checked_files} files. {len(valid_files)} need transcription.") # Choose “cuda” if available, otherwise “cpu” device = "cuda" if torch.cuda.is_available() else "cpu"