from datetime import datetime import re import os def extract_date_from_string(string_with_date): # grab X.Y.Z where X,Y,Z are 1–4 digits m = re.search(r'(\d{1,4}\.\d{1,2}\.\d{1,4})', string_with_date) if not m: return None date_str = m.group(1) parts = date_str.split('.') # 1) Unambiguous “last group = YYYY” if len(parts) == 3 and len(parts[2]) == 4: fmt = '%d.%m.%Y' # 2) Unambiguous “first group = YYYY” elif len(parts) == 3 and len(parts[0]) == 4: fmt = '%Y.%m.%d' # 3) Ambiguous “XX.XX.XX” → prefer DD.MM.YY, fallback to YY.MM.DD elif len(parts) == 3 and all(len(p) == 2 for p in parts): # try last-group-as-year first try: dt = datetime.strptime(date_str, '%d.%m.%y') return dt.strftime('%Y-%m-%d') except ValueError: # fallback to first-group-as-year fmt = '%y.%m.%d' else: # optional: handle ISO with dashes if '-' in date_str: try: dt = datetime.strptime(date_str, '%Y-%m-%d') return dt.strftime('%Y-%m-%d') except ValueError: return None return None # parse with whichever fmt we settled on try: dt = datetime.strptime(date_str, fmt) return dt.strftime('%Y-%m-%d') except ValueError: return None def extract_structure_from_string(input_string): # extract category and titel from filename filename_ext = os.path.splitext(input_string)[0] left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None) try: int(left_side.strip()) # first part is only a number previous_right_side = right_side left_side, right_side = previous_right_side.split('-', 1) if '-' in previous_right_side else (previous_right_side, None) except: # first part not a number pass if 'predig' in left_side.lower(): category = 'Predigt' elif 'wort' in left_side.lower() or 'einladung' in left_side.lower(): category = 'Vorwort' elif 'chor' in left_side.lower(): category = 'Chor' elif 'orchester' in left_side.lower(): category = 'Orchester' elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower(): category = 'Gruppenlied' elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower(): category = 'Gemeinsamer Gesang' elif 'gedicht' in left_side.lower(): category = 'Gedicht' elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower(): category = 'Instrumental' else: category = None if right_side: titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None) if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht': if not name: # kein Titel, nur Name name = titel titel = None else: titel = None name = None return category, titel, name