Differentiate between folder not found and folder empty

This commit is contained in:
lelo 2025-07-24 09:46:56 +02:00
parent fde07e3830
commit d5e2331330

View File

@ -214,21 +214,32 @@ def process_file(file_path, model, audio_input):
def process_folder(root_folder):
"""
Walk through root_folder and process .mp3 files, applying skip rules.
Only files that need to be transcribed (i.e. transcription does not already exist)
will have their audio pre-loaded concurrently.
Walk through root_folder and process .mp3 files.
Differentiates between folder not found and folder empty.
Select files that need to be transcribed (i.e. transcription does not already exist, applying certain rules)
"""
global start_time
keywords = ["musik", "chor", "lied", "gesang", "orchester", "orhester", "melodi", "sot"]
print("Create file list...")
# path actually exist / can we even try to list it?
if not os.path.exists(root_folder):
print(f"Error: Path '{root_folder}' does not exist or is not reachable.")
return
if not os.path.isdir(root_folder):
print(f"Error: Path '{root_folder}' exists but is not a folder.")
return
# Now we know the folder exists; let's scan it.
print(f"Scanning '{root_folder}' for .mp3 files…")
valid_files = []
checked_files = 0
# Walk the folder and build a list of files to transcribe.
for dirpath, _, filenames in os.walk(root_folder):
for filename in filenames:
if filename.lower().endswith(".mp3"):
checked_files = checked_files + 1
checked_files += 1
filename_lower = filename.lower()
file_path = os.path.join(dirpath, filename)
# Skip files with skip keywords.
@ -245,11 +256,19 @@ def process_folder(root_folder):
valid_files.append(file_path)
if len(valid_files) == 0:
print(f"Checked {checked_files} files. All files are transcribed.")
# If the folder was empty of .mp3s, checked_files will be 0,
# but we know it existed because we passed the exists()/isdir() tests.
if checked_files == 0:
print(f"Checked 0 files in '{root_folder}'. Folder is empty of .mp3s.")
return
else:
print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
# If you made it here, checked_files > 0 but maybe all were already transcribed:
if len(valid_files) == 0:
print(f"Checked {checked_files} files. All files are already transcribed.")
return
# Otherwise you have files to process…
print(f"Checked {checked_files} files. {len(valid_files)} need transcription.")
# Choose “cuda” if available, otherwise “cpu”
device = "cuda" if torch.cuda.is_available() else "cpu"