Compare commits

..

No commits in common. "f3b44d1d5a0c43b198ed9103a15b6f18ac0b8f66" and "6e180bfe8de884b0456f3bf68b0104319120c3f7" have entirely different histories.

View File

@ -214,32 +214,21 @@ def process_file(file_path, model, audio_input):
def process_folder(root_folder): def process_folder(root_folder):
""" """
Walk through root_folder and process .mp3 files. Walk through root_folder and process .mp3 files, applying skip rules.
Differentiates between folder not found and folder empty. Only files that need to be transcribed (i.e. transcription does not already exist)
Select files that need to be transcribed (i.e. transcription does not already exist, applying certain rules) will have their audio pre-loaded concurrently.
""" """
global start_time global start_time
keywords = ["musik", "chor", "lied", "gesang", "orchester", "orhester", "melodi", "sot"] keywords = ["musik", "chor", "lied", "gesang", "orchester", "orhester", "melodi", "sot"]
print("Create file list...") print("Create file list...")
# path actually exist / can we even try to list it?
if not os.path.exists(root_folder):
print(f"Error: Path '{root_folder}' does not exist or is not reachable.")
return
if not os.path.isdir(root_folder):
print(f"Error: Path '{root_folder}' exists but is not a folder.")
return
# Now we know the folder exists; let's scan it.
print(f"Scanning '{root_folder}' for .mp3 files…")
valid_files = [] valid_files = []
checked_files = 0 checked_files = 0
# Walk the folder and build a list of files to transcribe.
for dirpath, _, filenames in os.walk(root_folder): for dirpath, _, filenames in os.walk(root_folder):
for filename in filenames: for filename in filenames:
if filename.lower().endswith(".mp3"): if filename.lower().endswith(".mp3"):
checked_files += 1 checked_files = checked_files + 1
filename_lower = filename.lower() filename_lower = filename.lower()
file_path = os.path.join(dirpath, filename) file_path = os.path.join(dirpath, filename)
# Skip files with skip keywords. # Skip files with skip keywords.
@ -256,19 +245,11 @@ def process_folder(root_folder):
valid_files.append(file_path) valid_files.append(file_path)
# If the folder was empty of .mp3s, checked_files will be 0,
# but we know it existed because we passed the exists()/isdir() tests.
if checked_files == 0:
print(f"Checked 0 files in '{root_folder}'. Folder is empty of .mp3s.")
return
# If you made it here, checked_files > 0 but maybe all were already transcribed:
if len(valid_files) == 0: if len(valid_files) == 0:
print(f"Checked {checked_files} files. All files are already transcribed.") print(f"Checked {checked_files} files. All files are transcribed.")
return return
else:
# Otherwise you have files to process… print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
print(f"Checked {checked_files} files. {len(valid_files)} need transcription.")
# Choose “cuda” if available, otherwise “cpu” # Choose “cuda” if available, otherwise “cpu”
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"