diff --git a/index_for_search.py b/index_for_search.py index 36149ba..3d6a010 100755 --- a/index_for_search.py +++ b/index_for_search.py @@ -101,6 +101,10 @@ def updatefileindex(): relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/') filetype = os.path.splitext(entry.name)[1].lower() + if filetype not in ['.mp3', '.wav', '.ogg', '.m4a', '.flac']: + # Skip non-audio files. + continue + # Retrieve the hit count for this file. hit_count = get_hit_count(relative_path) @@ -111,68 +115,66 @@ def updatefileindex(): site = 'Speyer' elif foldername == 'Gottesdienste Schwegenheim': site = 'Schwegenheim' - - if filetype == '.mp3': - # Check for a corresponding transcript file in a sibling "Transkription" folder. - parent_dir = os.path.dirname(entry_path) - transcript_dir = os.path.join(parent_dir, "Transkription") - transcript_filename = os.path.splitext(entry.name)[0] + ".md" - transcript_path = os.path.join(transcript_dir, transcript_filename) - if os.path.exists(transcript_path): - try: - with open(transcript_path, 'r', encoding='utf-8') as tf: - transcript = tf.read() - except Exception: - transcript = None - - # extract category and titel from filename - filename_ext = os.path.splitext(entry.name)[0] - left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None) + # Check for a corresponding transcript file in a sibling "Transkription" folder. + parent_dir = os.path.dirname(entry_path) + transcript_dir = os.path.join(parent_dir, "Transkription") + transcript_filename = os.path.splitext(entry.name)[0] + ".md" + transcript_path = os.path.join(transcript_dir, transcript_filename) + if os.path.exists(transcript_path): try: - int(left_side.strip()) - # first part is only a number - previous_right_side = right_side - left_side, right_side = previous_right_side.split('-', 1) if '-' in previous_right_side else (previous_right_side, None) - except: - # first part not a number - pass - - if 'predig' in left_side.lower(): - category = 'Predigt' - elif 'wort' in left_side.lower() or 'einladung' in left_side.lower(): - category = 'Vorwort' - elif 'chor' in left_side.lower(): - category = 'Chor' - elif 'orchester' in left_side.lower(): - category = 'Orchester' - elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower(): - category = 'Gruppenlied' - elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower(): - category = 'Gemeinsamer Gesang' - elif 'gedicht' in left_side.lower(): - category = 'Gedicht' - elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower(): - category = 'Instrumental' - else: - category = None + with open(transcript_path, 'r', encoding='utf-8') as tf: + transcript = tf.read() + except Exception: + transcript = None - if right_side: - titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None) - if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht': - if not name: # kein Titel, nur Name - name = titel - titel = None - else: - titel = None - name = None + # extract category and titel from filename + filename_ext = os.path.splitext(entry.name)[0] + left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None) + try: + int(left_side.strip()) + # first part is only a number + previous_right_side = right_side + left_side, right_side = previous_right_side.split('-', 1) if '-' in previous_right_side else (previous_right_side, None) + except: + # first part not a number + pass + + if 'predig' in left_side.lower(): + category = 'Predigt' + elif 'wort' in left_side.lower() or 'einladung' in left_side.lower(): + category = 'Vorwort' + elif 'chor' in left_side.lower(): + category = 'Chor' + elif 'orchester' in left_side.lower(): + category = 'Orchester' + elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower(): + category = 'Gruppenlied' + elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower(): + category = 'Gemeinsamer Gesang' + elif 'gedicht' in left_side.lower(): + category = 'Gedicht' + elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower(): + category = 'Instrumental' + else: + category = None + + if right_side: + titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None) + if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht': + if not name: # kein Titel, nur Name + name = titel + titel = None + else: + titel = None + name = None - # extract the date from path using regex (dd.mm.yyyy or dd.mm.yy) - date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path) + # extract the date from path using regex (supports YYYY.MM.DD, DD.MM.YYYY or DD.MM.YY) + date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4}|\d{4}\.\d{2}\.\d{2})', relative_path) if date_match: date_str = date_match.group(1) performance_date = None - for fmt in ('%d.%m.%Y', '%d.%m.%y', '%Y-%m-%d'): + for fmt in ('%Y.%m.%d', '%d.%m.%Y', '%d.%m.%y', '%Y-%m-%d'): try: date_obj = datetime.strptime(date_str, fmt) # Convert to ISO format YYYY-MM-DD