From b518b61a64b03c1381177666cfe43a0d5cbc945b Mon Sep 17 00:00:00 2001 From: lelo Date: Tue, 20 May 2025 20:46:52 +0000 Subject: [PATCH] use helper function --- index_for_search.py | 48 ++------------------------------------------- 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/index_for_search.py b/index_for_search.py index 1ad7ff5..804336d 100755 --- a/index_for_search.py +++ b/index_for_search.py @@ -3,6 +3,7 @@ import json import sqlite3 from datetime import datetime import re +import helperfunctions as hf SEARCH_DB_NAME = 'search.db' ACCESS_LOG_DB_NAME = 'access_log.db' @@ -70,51 +71,6 @@ def get_hit_count(relative_path): return row["hit_count"] if row else 0 -def extract_date_from_string(string_with_date): - # grab X.Y.Z where X,Y,Z are 1–4 digits - m = re.search(r'(\d{1,4}\.\d{1,2}\.\d{1,4})', string_with_date) - if not m: - return None - - date_str = m.group(1) - parts = date_str.split('.') - - # 1) Unambiguous “last group = YYYY” - if len(parts) == 3 and len(parts[2]) == 4: - fmt = '%d.%m.%Y' - - # 2) Unambiguous “first group = YYYY” - elif len(parts) == 3 and len(parts[0]) == 4: - fmt = '%Y.%m.%d' - - # 3) Ambiguous “XX.XX.XX” → prefer DD.MM.YY, fallback to YY.MM.DD - elif len(parts) == 3 and all(len(p) == 2 for p in parts): - # try last-group-as-year first - try: - dt = datetime.strptime(date_str, '%d.%m.%y') - return dt.strftime('%Y-%m-%d') - except ValueError: - # fallback to first-group-as-year - fmt = '%y.%m.%d' - - else: - # optional: handle ISO with dashes - if '-' in date_str: - try: - dt = datetime.strptime(date_str, '%Y-%m-%d') - return dt.strftime('%Y-%m-%d') - except ValueError: - return None - return None - - # parse with whichever fmt we settled on - try: - dt = datetime.strptime(date_str, fmt) - return dt.strftime('%Y-%m-%d') - except ValueError: - return None - - def updatefileindex(): cursor = search_db.cursor() @@ -215,7 +171,7 @@ def updatefileindex(): titel = None name = None - performance_date = extract_date_from_string(relative_path) + performance_date = hf.extract_date_from_string(relative_path) scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count)) current_keys.add((relative_path, entry.name))