92 lines
3.1 KiB
Python
92 lines
3.1 KiB
Python
from datetime import datetime
|
||
import re
|
||
import os
|
||
|
||
def extract_date_from_string(string_with_date):
|
||
# grab X.Y.Z where X,Y,Z are 1–4 digits
|
||
m = re.search(r'(\d{1,4}\.\d{1,2}\.\d{1,4})', string_with_date)
|
||
if not m:
|
||
return None
|
||
|
||
date_str = m.group(1)
|
||
parts = date_str.split('.')
|
||
|
||
# 1) Unambiguous “last group = YYYY”
|
||
if len(parts) == 3 and len(parts[2]) == 4:
|
||
fmt = '%d.%m.%Y'
|
||
|
||
# 2) Unambiguous “first group = YYYY”
|
||
elif len(parts) == 3 and len(parts[0]) == 4:
|
||
fmt = '%Y.%m.%d'
|
||
|
||
# 3) Ambiguous “XX.XX.XX” → prefer DD.MM.YY, fallback to YY.MM.DD
|
||
elif len(parts) == 3 and all(len(p) == 2 for p in parts):
|
||
# try last-group-as-year first
|
||
try:
|
||
dt = datetime.strptime(date_str, '%d.%m.%y')
|
||
return dt.strftime('%Y-%m-%d')
|
||
except ValueError:
|
||
# fallback to first-group-as-year
|
||
fmt = '%y.%m.%d'
|
||
|
||
else:
|
||
# optional: handle ISO with dashes
|
||
if '-' in date_str:
|
||
try:
|
||
dt = datetime.strptime(date_str, '%Y-%m-%d')
|
||
return dt.strftime('%Y-%m-%d')
|
||
except ValueError:
|
||
return None
|
||
return None
|
||
|
||
# parse with whichever fmt we settled on
|
||
try:
|
||
dt = datetime.strptime(date_str, fmt)
|
||
return dt.strftime('%Y-%m-%d')
|
||
except ValueError:
|
||
return None
|
||
|
||
|
||
def extract_structure_from_string(input_string):
|
||
# extract category and titel from filename
|
||
filename_ext = os.path.splitext(input_string)[0]
|
||
left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
|
||
try:
|
||
int(left_side.strip())
|
||
# first part is only a number
|
||
previous_right_side = right_side
|
||
left_side, right_side = previous_right_side.split('-', 1) if '-' in previous_right_side else (previous_right_side, None)
|
||
except:
|
||
# first part not a number
|
||
pass
|
||
|
||
if 'predig' in left_side.lower():
|
||
category = 'Predigt'
|
||
elif 'wort' in left_side.lower() or 'einladung' in left_side.lower():
|
||
category = 'Vorwort'
|
||
elif 'chor' in left_side.lower():
|
||
category = 'Chor'
|
||
elif 'orchester' in left_side.lower():
|
||
category = 'Orchester'
|
||
elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower():
|
||
category = 'Gruppenlied'
|
||
elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower():
|
||
category = 'Gemeinsamer Gesang'
|
||
elif 'gedicht' in left_side.lower():
|
||
category = 'Gedicht'
|
||
elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower():
|
||
category = 'Instrumental'
|
||
else:
|
||
category = None
|
||
|
||
if right_side:
|
||
titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None)
|
||
if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht':
|
||
if not name: # kein Titel, nur Name
|
||
name = titel
|
||
titel = None
|
||
else:
|
||
titel = None
|
||
name = None
|
||
|
||
return category, titel, name |