diff --git a/analytics.py b/analytics.py index fa28f80..145d7b1 100644 --- a/analytics.py +++ b/analytics.py @@ -3,6 +3,8 @@ from flask import render_template, request, session from datetime import datetime, timedelta, timezone import geoip2.database from auth import require_secret +from collections import defaultdict +import json import os file_access_temp = [] @@ -12,6 +14,7 @@ DB_NAME = 'access_log.db' # Create a single global connection to SQLite log_db = sqlite3.connect(DB_NAME, check_same_thread=False) +search_db = sqlite3.connect("search.db", check_same_thread=False) # geo location geoReader = geoip2.database.Reader('GeoLite2-City.mmdb') @@ -137,6 +140,45 @@ def return_file_access(): else: return [] +def songs_dashboard(): + days_param = request.args.get("days", "30") + site = str(request.args.get("site", "Speyer")) + + # Determine cutoff_date based on the days parameter + if days_param == "all": + cutoff_date = None # No date filtering when analyzing all time + timeframe = "all" # Pass the string to the template if needed + else: + timeframe = int(days_param) + now = datetime.now() + cutoff_date = now - timedelta(days=timeframe) + + cursor = search_db.cursor() + # Query rows with category "Gemeinsamer Gesang" + query = "SELECT titel, performance_date FROM files WHERE category = ? and site = ?" + cursor.execute(query, ('Gemeinsamer Gesang', site)) + rows = cursor.fetchall() + + # Group and count performances per titel (only if performance_date is within the timeframe, + # or count all if cutoff_date is None) + performance_counts = defaultdict(int) + for titel, performance_date in rows: + if performance_date: + try: + # Convert date from "dd.mm.yyyy" format + date_obj = datetime.strptime(performance_date, "%d.%m.%Y") + except ValueError: + continue + # If cutoff_date is None, count all dates; otherwise, filter by cutoff_date. + if cutoff_date is None or date_obj >= cutoff_date: + performance_counts[titel] += 1 + + # Create a list of tuples: (count, titel), sorted in descending order by count. + performance_data = [(count, titel) for titel, count in performance_counts.items()] + performance_data.sort(reverse=True, key=lambda x: x[0]) + + return render_template('songs_dashboard.html', timeframe=timeframe, performance_data=performance_data, site=site) + @require_secret def connections(): return render_template('connections.html') @@ -424,3 +466,31 @@ def dashboard(): cached_percentage=cached_percentage, timeframe_data=timeframe_data ) + +def export_to_excel(): + """Export search_db to an Excel file and store it locally.""" + import pandas as pd + + # Query all data from the search_db + query = "SELECT * FROM files" + cursor = search_db.cursor() + cursor.execute(query) + rows = cursor.fetchall() + + # Get column names from the cursor description + column_names = [description[0] for description in cursor.description] + + # Create a DataFrame and save it to an Excel file + df = pd.DataFrame(rows, columns=column_names) + df = df.drop(columns=['transcript'], errors='ignore') # Drop the 'id' column if it exists + df.to_excel("search_db.xlsx", index=False) + + # Close the cursor and database connection + cursor.close() + +if __name__ == "__main__": + print("Running as a standalone script.") + export_to_excel() + print("Exported search_db to search_db.xlsx") + + \ No newline at end of file diff --git a/app.py b/app.py index 1ab4b54..e678b70 100755 --- a/app.py +++ b/app.py @@ -41,6 +41,8 @@ app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST' app.add_url_rule('/search', view_func=search.search, methods=['GET']) app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST']) +app.add_url_rule('/songs_dashboard', view_func=a.songs_dashboard) + # Grab the HOST_RULE environment variable host_rule = os.getenv("HOST_RULE", "") diff --git a/index_for_search.py b/index_for_search.py old mode 100644 new mode 100755 index ee3d559..ef9deec --- a/index_for_search.py +++ b/index_for_search.py @@ -1,6 +1,8 @@ import os import json import sqlite3 +from datetime import datetime +import re SEARCH_DB_NAME = 'search.db' ACCESS_LOG_DB_NAME = 'access_log.db' @@ -25,6 +27,11 @@ def init_db(): basefolder TEXT, filename TEXT, filetype TEXT, + category TEXT, + titel TEXT, + name TEXT, + performance_date TEXT, + site TEXT, transcript TEXT, hitcount INTEGER DEFAULT 0, UNIQUE(relative_path, filename) @@ -42,6 +49,31 @@ def init_db(): except sqlite3.OperationalError: # Likely the column already exists, so we ignore this error. pass + try: + cursor.execute("ALTER TABLE files ADD COLUMN category TEXT") + except sqlite3.OperationalError: + # Likely the column already exists, so we ignore this error. + pass + try: + cursor.execute("ALTER TABLE files ADD COLUMN titel TEXT") + except sqlite3.OperationalError: + # Likely the column already exists, so we ignore this error. + pass + try: + cursor.execute("ALTER TABLE files ADD COLUMN name TEXT") + except sqlite3.OperationalError: + # Likely the column already exists, so we ignore this error. + pass + try: + cursor.execute("ALTER TABLE files ADD COLUMN performance_date TEXT") + except sqlite3.OperationalError: + # Likely the column already exists, so we ignore this error. + pass + try: + cursor.execute("ALTER TABLE files ADD COLUMN site TEXT") + except sqlite3.OperationalError: + # Likely the column already exists, so we ignore this error. + pass search_db.commit() def scan_dir(directory): @@ -86,7 +118,6 @@ def updatefileindex(): # Accumulate scanned file data and keys for this base folder. scanned_files = [] # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount) current_keys = set() - for entry in scan_dir(norm_folderpath): entry_path = os.path.normpath(entry.path) # Get relative part by slicing if possible. @@ -113,8 +144,66 @@ def updatefileindex(): # Retrieve the hit count for this file. hit_count = get_hit_count(relative_path) + + category, titel, name, performance_date, site = None, None, None, None, None + + if filetype == '.mp3': + # Determine the site + if foldername == 'Gottesdienste Speyer': + site = 'Speyer' + elif foldername == 'Gottesdienste Schwegenheim': + site = 'Schwegenheim' - scanned_files.append((relative_path, foldername, entry.name, filetype, transcript, hit_count)) + # extract category and titel from filename + filename_ext = os.path.splitext(entry.name)[0] + left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None) + if 'predigt' in left_side.lower(): + category = 'Predigt' + elif 'wort' in left_side.lower() or 'einladung' in left_side.lower(): + category = 'Vorwort' + elif 'chor' in left_side.lower(): + category = 'Chor' + elif 'orchester' in left_side.lower(): + category = 'Orchester' + elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower(): + category = 'Gruppenlied' + elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower(): + category = 'Gemeinsamer Gesang' + elif 'gedicht' in left_side.lower(): + category = 'Gedicht' + elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower(): + category = 'Instrumental' + else: + category = None + + if right_side: + titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None) + if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht': + if not name: # kein Titel, nur Name + name = titel + titel = None + else: + titel = None + name = None + + # extract the date from path using regex (dd.mm.yyyy or dd.mm.yy) + date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path) + if date_match: + date_str = date_match.group(1) + # Convert to YYYY-MM-DD format + try: + date_obj = datetime.strptime(date_str, '%d.%m.%Y') + performance_date = date_obj.strftime('%d.%m.%Y') + except ValueError: + try: + date_obj = datetime.strptime(date_str, '%d.%m.%y') + performance_date = date_obj.strftime('%d.%m.%Y') + except ValueError: + performance_date = None + else: + performance_date = None + + scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count)) current_keys.add((relative_path, entry.name)) # Remove database entries for files under this base folder that are no longer on disk. @@ -128,7 +217,7 @@ def updatefileindex(): # Bulk write the scanned files using INSERT OR REPLACE. cursor.executemany( - "INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?)", + "INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, category, titel, name, performance_date, site, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", scanned_files ) diff --git a/requirements.txt b/requirements.txt index 4de8d00..e46e3fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,7 @@ pillow qrcode diskcache geoip2 +pandas +openpyxl gunicorn eventlet diff --git a/templates/songs_dashboard.html b/templates/songs_dashboard.html new file mode 100644 index 0000000..b202a78 --- /dev/null +++ b/templates/songs_dashboard.html @@ -0,0 +1,50 @@ + + + + + Gemeinsamer Gesang + + + + +
+

Dashboard: Gemeinsamer Gesang

+ + +
+
+ + + + +
+
+ + + + + + + + + + + {% for count, titel in performance_data %} + + + + + {% endfor %} + +
AnzahlTitel
{{ count }}{{ titel }}
+
+ +