bethaus-app/helperfunctions.py

181 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from flask import session
import re
import os
import sqlite3
from datetime import datetime, timedelta
from typing import Optional
import auth
app_config = auth.return_app_config()
BASE_DIR = os.path.realpath(app_config['BASE_DIR'])
log_db = sqlite3.connect("access_log.db", check_same_thread=False)
# Precompiled regex to find date-like patterns: either dotted X.Y.Z or ISO dashed YYYY-MM-DD
_DATE_REGEX = re.compile(
r"(" # start group
r"\d{4}-\d{1,2}-\d{1,2}" # ISO: YYYY-M-D or YYYY-MM-DD
r"|" # or
r"\d{1,4}\.\d{1,2}\.\d{1,4}" # dotted: X.Y.Z, where each is 14 digits (year may be 14)
r")"
)
def _try_parse(date_str: str, fmt: str) -> Optional[datetime]:
"""Try to parse date_str with fmt, return datetime or None."""
try:
return datetime.strptime(date_str, fmt)
except ValueError:
return None
def extract_date_from_string(text: str) -> Optional[str]:
"""
Extract the first date-like substring from text and return it in ISO format (YYYY-MM-DD).
Supports:
- ISO-style dates with dashes (YYYY-M-D or YYYY-MM-DD)
- Dotted dates (DD.MM.YYYY, YYYY.MM.DD, DD.MM.YY, YY.MM.DD)
"""
match = _DATE_REGEX.search(text)
if not match:
return None
date_str = match.group(1)
# 1) ISO dashed format takes priority
if '-' in date_str:
dt = _try_parse(date_str, '%Y-%m-%d')
return dt.strftime('%Y-%m-%d') if dt else None
# 2) Dotted formats
parts = date_str.split('.')
candidates = []
# Unambiguous: last part 4 digits → DD.MM.YYYY
if len(parts) == 3 and len(parts[2]) == 4:
candidates.append('%d.%m.%Y')
# Unambiguous: first part 4 digits → YYYY.MM.DD
if len(parts) == 3 and len(parts[0]) == 4:
candidates.append('%Y.%m.%d')
# Ambiguous two-digit groups: try DD.MM.YY, then YY.MM.DD
if len(parts) == 3 and all(len(p) == 2 for p in parts):
candidates.extend(['%d.%m.%y', '%y.%m.%d'])
# Try each candidate
for fmt in candidates:
dt = _try_parse(date_str, fmt)
if dt:
return dt.strftime('%Y-%m-%d')
# no valid parse
return None
def extract_structure_from_string(input_string):
# extract category and titel from filename
filepathname_ext = os.path.splitext(input_string)[0] # remove file extension
filename_ext = os.path.basename(filepathname_ext) # get only the filename
left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
try:
int(left_side.strip())
# first part is only a number
previous_right_side = right_side
left_side, right_side = previous_right_side.split('-', 1) if '-' in previous_right_side else (previous_right_side, None)
except:
# first part not a number
pass
# define your mapping: category → list of trigger-words
CATEGORY_KEYWORDS = {
'Predigt': ['predig', 'thema'],
'Vorwort': ['wort', 'einladung', 'begrüßung', 'ansprache', 'einleitung', 'aufruf zum', 'zuruf zum'],
'Kinderchor': ['kinderchor'],
'Jugendchor': ['jugendchor'],
'Orchester': ['orchester', 'sinfonie', 'symphonie'],
'Chor': ['chor'],
'Gemeinsamer Gesang': ['gemeinsam', 'gemeindelied', 'gemeinsamer gesang'],
'Gruppenlied': ['gruppenlied', 'jugend', 'lied', 'musikgruppe'],
'Gedicht': ['gedicht'],
'Erzählung': ['vortrag', 'erzä', 'program'],
'Instrumental': ['instrumental', 'musikstück', 'harfenstück'],
}
# walk the dict in your desired priority order
category = None
text = left_side.lower()
for cat, keywords in CATEGORY_KEYWORDS.items():
if any(kw in text for kw in keywords):
category = cat
break
if right_side:
titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None)
if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht':
if not name: # kein Titel, nur Name
name = titel
titel = None
else:
titel = None
name = None
return category, titel, name
def generate_top_list(category):
now = datetime.now()
# We'll compare the timestamp
start_dt = now - timedelta(days=14)
start_str = start_dt.isoformat()
# Filter for mimes that start with the given type
params_for_filter = (start_str,)
# 1. Top files by access count
query = f'''
SELECT rel_path, COUNT(*) as access_count
FROM file_access_log
WHERE timestamp >= ? AND mime LIKE 'audio/%'
GROUP BY rel_path
ORDER BY access_count DESC
LIMIT 1000
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
rows = cursor.fetchall()
# Filter by allowed base folders
allowed_basefolders = list(session['folders'].keys())
rows = [
(rel_path, access_count) for rel_path, access_count in rows
if any(rel_path.startswith(folder) for folder in allowed_basefolders)
]
# Convert rows to a list of dicts and add category
records = [
{
'rel_path': rel_path,
'access_count': access_count,
'category': extract_structure_from_string(rel_path)[0]
}
for rel_path, access_count in rows
]
# Filter by requested category and limit
records = [r for r in records if r['category'] == category][:20]
# Build file list and check existence
filelist = []
for record in records:
rel_path = record['rel_path']
if os.path.exists(os.path.join(BASE_DIR, rel_path)): # ensure file exists on disk // slow operation. maybe improve later
filelist.append({
'name': os.path.basename(rel_path),
'path': rel_path,
'file_type': 'music'
})
return filelist