from datetime import datetime import re def extract_date_from_string(string_with_date): # grab X.Y.Z where X,Y,Z are 1–4 digits m = re.search(r'(\d{1,4}\.\d{1,2}\.\d{1,4})', string_with_date) if not m: return None date_str = m.group(1) parts = date_str.split('.') # 1) Unambiguous “last group = YYYY” if len(parts) == 3 and len(parts[2]) == 4: fmt = '%d.%m.%Y' # 2) Unambiguous “first group = YYYY” elif len(parts) == 3 and len(parts[0]) == 4: fmt = '%Y.%m.%d' # 3) Ambiguous “XX.XX.XX” → prefer DD.MM.YY, fallback to YY.MM.DD elif len(parts) == 3 and all(len(p) == 2 for p in parts): # try last-group-as-year first try: dt = datetime.strptime(date_str, '%d.%m.%y') return dt.strftime('%Y-%m-%d') except ValueError: # fallback to first-group-as-year fmt = '%y.%m.%d' else: # optional: handle ISO with dashes if '-' in date_str: try: dt = datetime.strptime(date_str, '%Y-%m-%d') return dt.strftime('%Y-%m-%d') except ValueError: return None return None # parse with whichever fmt we settled on try: dt = datetime.strptime(date_str, fmt) return dt.strftime('%Y-%m-%d') except ValueError: return None