167 lines
5.2 KiB
Python
167 lines
5.2 KiB
Python
from flask import session
|
||
import re
|
||
import os
|
||
import sqlite3
|
||
from datetime import datetime, timedelta
|
||
from typing import Optional
|
||
import auth
|
||
|
||
app_config = auth.return_app_config()
|
||
BASE_DIR = os.path.realpath(app_config['BASE_DIR'])
|
||
CATEGORY_KEYWORDS = os.path.realpath(app_config['CATEGORY_KEYWORDS'])
|
||
|
||
log_db = sqlite3.connect("access_log.db", check_same_thread=False)
|
||
|
||
|
||
|
||
# Precompiled regex to find date-like patterns: either dotted X.Y.Z or ISO dashed YYYY-MM-DD
|
||
_DATE_REGEX = re.compile(
|
||
r"(" # start group
|
||
r"\d{4}-\d{1,2}-\d{1,2}" # ISO: YYYY-M-D or YYYY-MM-DD
|
||
r"|" # or
|
||
r"\d{1,4}\.\d{1,2}\.\d{1,4}" # dotted: X.Y.Z, where each is 1–4 digits (year may be 1–4)
|
||
r")"
|
||
)
|
||
|
||
|
||
def _try_parse(date_str: str, fmt: str) -> Optional[datetime]:
|
||
"""Try to parse date_str with fmt, return datetime or None."""
|
||
try:
|
||
return datetime.strptime(date_str, fmt)
|
||
except ValueError:
|
||
return None
|
||
|
||
|
||
def extract_date_from_string(text: str) -> Optional[str]:
|
||
"""
|
||
Extract the first date-like substring from text and return it in ISO format (YYYY-MM-DD).
|
||
Supports:
|
||
- ISO-style dates with dashes (YYYY-M-D or YYYY-MM-DD)
|
||
- Dotted dates (DD.MM.YYYY, YYYY.MM.DD, DD.MM.YY, YY.MM.DD)
|
||
"""
|
||
match = _DATE_REGEX.search(text)
|
||
if not match:
|
||
return None
|
||
|
||
date_str = match.group(1)
|
||
|
||
# 1) ISO dashed format takes priority
|
||
if '-' in date_str:
|
||
dt = _try_parse(date_str, '%Y-%m-%d')
|
||
return dt.strftime('%Y-%m-%d') if dt else None
|
||
|
||
# 2) Dotted formats
|
||
parts = date_str.split('.')
|
||
candidates = []
|
||
|
||
# Unambiguous: last part 4 digits → DD.MM.YYYY
|
||
if len(parts) == 3 and len(parts[2]) == 4:
|
||
candidates.append('%d.%m.%Y')
|
||
# Unambiguous: first part 4 digits → YYYY.MM.DD
|
||
if len(parts) == 3 and len(parts[0]) == 4:
|
||
candidates.append('%Y.%m.%d')
|
||
# Ambiguous two-digit groups: try DD.MM.YY, then YY.MM.DD
|
||
if len(parts) == 3 and all(len(p) == 2 for p in parts):
|
||
candidates.extend(['%d.%m.%y', '%y.%m.%d'])
|
||
|
||
# Try each candidate
|
||
for fmt in candidates:
|
||
dt = _try_parse(date_str, fmt)
|
||
if dt:
|
||
return dt.strftime('%Y-%m-%d')
|
||
|
||
# no valid parse
|
||
return None
|
||
|
||
|
||
def extract_structure_from_string(input_string):
|
||
# extract category and titel from filename
|
||
filepathname_ext = os.path.splitext(input_string)[0] # remove file extension
|
||
filename_ext = os.path.basename(filepathname_ext) # get only the filename
|
||
left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
|
||
|
||
try:
|
||
int(left_side.strip())
|
||
# first part is only a number
|
||
previous_right_side = right_side
|
||
left_side, right_side = previous_right_side.split('-', 1) if '-' in previous_right_side else (previous_right_side, None)
|
||
except:
|
||
# first part not a number
|
||
pass
|
||
|
||
# walk the dict in your desired priority order
|
||
category = None
|
||
text = left_side.lower()
|
||
for cat, keywords in CATEGORY_KEYWORDS.items():
|
||
if any(kw in text for kw in keywords):
|
||
category = cat
|
||
break
|
||
|
||
if right_side:
|
||
titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None)
|
||
if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht':
|
||
if not name: # kein Titel, nur Name
|
||
name = titel
|
||
titel = None
|
||
else:
|
||
titel = None
|
||
name = None
|
||
|
||
return category, titel, name
|
||
|
||
def generate_top_list(category):
|
||
|
||
now = datetime.now()
|
||
|
||
# We'll compare the timestamp
|
||
start_dt = now - timedelta(days=14)
|
||
start_str = start_dt.isoformat()
|
||
|
||
# Filter for mimes that start with the given type
|
||
params_for_filter = (start_str,)
|
||
|
||
# 1. Top files by access count
|
||
query = f'''
|
||
SELECT rel_path, COUNT(*) as access_count
|
||
FROM file_access_log
|
||
WHERE timestamp >= ? AND mime LIKE 'audio/%'
|
||
GROUP BY rel_path
|
||
ORDER BY access_count DESC
|
||
LIMIT 1000
|
||
'''
|
||
with log_db:
|
||
cursor = log_db.execute(query, params_for_filter)
|
||
rows = cursor.fetchall()
|
||
|
||
# Filter by allowed base folders
|
||
allowed_basefolders = list(session['folders'].keys())
|
||
rows = [
|
||
(rel_path, access_count) for rel_path, access_count in rows
|
||
if any(rel_path.startswith(folder) for folder in allowed_basefolders)
|
||
]
|
||
|
||
# Convert rows to a list of dicts and add category
|
||
records = [
|
||
{
|
||
'rel_path': rel_path,
|
||
'access_count': access_count,
|
||
'category': extract_structure_from_string(rel_path)[0]
|
||
}
|
||
for rel_path, access_count in rows
|
||
]
|
||
# Filter by requested category and limit
|
||
records = [r for r in records if r['category'] == category][:20]
|
||
|
||
# Build file list and check existence
|
||
filelist = []
|
||
for record in records:
|
||
rel_path = record['rel_path']
|
||
if os.path.exists(os.path.join(BASE_DIR, rel_path)): # ensure file exists on disk // slow operation. maybe improve later
|
||
filelist.append({
|
||
'name': os.path.basename(rel_path),
|
||
'path': rel_path,
|
||
'file_type': 'music'
|
||
})
|
||
|
||
return filelist
|