add song analytics

This commit is contained in:
lelo 2025-04-07 21:06:23 +00:00
parent a6e29d81ef
commit 04bb218ac7
5 changed files with 216 additions and 3 deletions

View File

@ -3,6 +3,8 @@ from flask import render_template, request, session
from datetime import datetime, timedelta, timezone
import geoip2.database
from auth import require_secret
from collections import defaultdict
import json
import os
file_access_temp = []
@ -12,6 +14,7 @@ DB_NAME = 'access_log.db'
# Create a single global connection to SQLite
log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
search_db = sqlite3.connect("search.db", check_same_thread=False)
# geo location
geoReader = geoip2.database.Reader('GeoLite2-City.mmdb')
@ -137,6 +140,45 @@ def return_file_access():
else:
return []
def songs_dashboard():
days_param = request.args.get("days", "30")
site = str(request.args.get("site", "Speyer"))
# Determine cutoff_date based on the days parameter
if days_param == "all":
cutoff_date = None # No date filtering when analyzing all time
timeframe = "all" # Pass the string to the template if needed
else:
timeframe = int(days_param)
now = datetime.now()
cutoff_date = now - timedelta(days=timeframe)
cursor = search_db.cursor()
# Query rows with category "Gemeinsamer Gesang"
query = "SELECT titel, performance_date FROM files WHERE category = ? and site = ?"
cursor.execute(query, ('Gemeinsamer Gesang', site))
rows = cursor.fetchall()
# Group and count performances per titel (only if performance_date is within the timeframe,
# or count all if cutoff_date is None)
performance_counts = defaultdict(int)
for titel, performance_date in rows:
if performance_date:
try:
# Convert date from "dd.mm.yyyy" format
date_obj = datetime.strptime(performance_date, "%d.%m.%Y")
except ValueError:
continue
# If cutoff_date is None, count all dates; otherwise, filter by cutoff_date.
if cutoff_date is None or date_obj >= cutoff_date:
performance_counts[titel] += 1
# Create a list of tuples: (count, titel), sorted in descending order by count.
performance_data = [(count, titel) for titel, count in performance_counts.items()]
performance_data.sort(reverse=True, key=lambda x: x[0])
return render_template('songs_dashboard.html', timeframe=timeframe, performance_data=performance_data, site=site)
@require_secret
def connections():
return render_template('connections.html')
@ -424,3 +466,31 @@ def dashboard():
cached_percentage=cached_percentage,
timeframe_data=timeframe_data
)
def export_to_excel():
"""Export search_db to an Excel file and store it locally."""
import pandas as pd
# Query all data from the search_db
query = "SELECT * FROM files"
cursor = search_db.cursor()
cursor.execute(query)
rows = cursor.fetchall()
# Get column names from the cursor description
column_names = [description[0] for description in cursor.description]
# Create a DataFrame and save it to an Excel file
df = pd.DataFrame(rows, columns=column_names)
df = df.drop(columns=['transcript'], errors='ignore') # Drop the 'id' column if it exists
df.to_excel("search_db.xlsx", index=False)
# Close the cursor and database connection
cursor.close()
if __name__ == "__main__":
print("Running as a standalone script.")
export_to_excel()
print("Exported search_db to search_db.xlsx")

2
app.py
View File

@ -41,6 +41,8 @@ app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'
app.add_url_rule('/search', view_func=search.search, methods=['GET'])
app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
app.add_url_rule('/songs_dashboard', view_func=a.songs_dashboard)
# Grab the HOST_RULE environment variable
host_rule = os.getenv("HOST_RULE", "")

95
index_for_search.py Normal file → Executable file
View File

@ -1,6 +1,8 @@
import os
import json
import sqlite3
from datetime import datetime
import re
SEARCH_DB_NAME = 'search.db'
ACCESS_LOG_DB_NAME = 'access_log.db'
@ -25,6 +27,11 @@ def init_db():
basefolder TEXT,
filename TEXT,
filetype TEXT,
category TEXT,
titel TEXT,
name TEXT,
performance_date TEXT,
site TEXT,
transcript TEXT,
hitcount INTEGER DEFAULT 0,
UNIQUE(relative_path, filename)
@ -42,6 +49,31 @@ def init_db():
except sqlite3.OperationalError:
# Likely the column already exists, so we ignore this error.
pass
try:
cursor.execute("ALTER TABLE files ADD COLUMN category TEXT")
except sqlite3.OperationalError:
# Likely the column already exists, so we ignore this error.
pass
try:
cursor.execute("ALTER TABLE files ADD COLUMN titel TEXT")
except sqlite3.OperationalError:
# Likely the column already exists, so we ignore this error.
pass
try:
cursor.execute("ALTER TABLE files ADD COLUMN name TEXT")
except sqlite3.OperationalError:
# Likely the column already exists, so we ignore this error.
pass
try:
cursor.execute("ALTER TABLE files ADD COLUMN performance_date TEXT")
except sqlite3.OperationalError:
# Likely the column already exists, so we ignore this error.
pass
try:
cursor.execute("ALTER TABLE files ADD COLUMN site TEXT")
except sqlite3.OperationalError:
# Likely the column already exists, so we ignore this error.
pass
search_db.commit()
def scan_dir(directory):
@ -86,7 +118,6 @@ def updatefileindex():
# Accumulate scanned file data and keys for this base folder.
scanned_files = [] # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount)
current_keys = set()
for entry in scan_dir(norm_folderpath):
entry_path = os.path.normpath(entry.path)
# Get relative part by slicing if possible.
@ -114,7 +145,65 @@ def updatefileindex():
# Retrieve the hit count for this file.
hit_count = get_hit_count(relative_path)
scanned_files.append((relative_path, foldername, entry.name, filetype, transcript, hit_count))
category, titel, name, performance_date, site = None, None, None, None, None
if filetype == '.mp3':
# Determine the site
if foldername == 'Gottesdienste Speyer':
site = 'Speyer'
elif foldername == 'Gottesdienste Schwegenheim':
site = 'Schwegenheim'
# extract category and titel from filename
filename_ext = os.path.splitext(entry.name)[0]
left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
if 'predigt' in left_side.lower():
category = 'Predigt'
elif 'wort' in left_side.lower() or 'einladung' in left_side.lower():
category = 'Vorwort'
elif 'chor' in left_side.lower():
category = 'Chor'
elif 'orchester' in left_side.lower():
category = 'Orchester'
elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower():
category = 'Gruppenlied'
elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower():
category = 'Gemeinsamer Gesang'
elif 'gedicht' in left_side.lower():
category = 'Gedicht'
elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower():
category = 'Instrumental'
else:
category = None
if right_side:
titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None)
if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht':
if not name: # kein Titel, nur Name
name = titel
titel = None
else:
titel = None
name = None
# extract the date from path using regex (dd.mm.yyyy or dd.mm.yy)
date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path)
if date_match:
date_str = date_match.group(1)
# Convert to YYYY-MM-DD format
try:
date_obj = datetime.strptime(date_str, '%d.%m.%Y')
performance_date = date_obj.strftime('%d.%m.%Y')
except ValueError:
try:
date_obj = datetime.strptime(date_str, '%d.%m.%y')
performance_date = date_obj.strftime('%d.%m.%Y')
except ValueError:
performance_date = None
else:
performance_date = None
scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count))
current_keys.add((relative_path, entry.name))
# Remove database entries for files under this base folder that are no longer on disk.
@ -128,7 +217,7 @@ def updatefileindex():
# Bulk write the scanned files using INSERT OR REPLACE.
cursor.executemany(
"INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?)",
"INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, category, titel, name, performance_date, site, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
scanned_files
)

View File

@ -4,5 +4,7 @@ pillow
qrcode
diskcache
geoip2
pandas
openpyxl
gunicorn
eventlet

View File

@ -0,0 +1,50 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Gemeinsamer Gesang</title>
<!-- Bootstrap CSS -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
</head>
<body>
<div class="container mt-4">
<h1>Dashboard: Gemeinsamer Gesang</h1>
<!-- Timeframe selection form -->
<form method="get" action="{{ url_for('songs_dashboard') }}" class="mb-3" onsubmit="this.submit();">
<div class="form-group">
<label for="siteSelect">Gemeindehaus</label>
<select class="form-control" id="siteSelect" name="site" onchange="this.form.submit();">
<option value="Speyer" {% if site == "Speyer" %}selected{% endif %}>Speyer</option>
<option value="Schwegenheim" {% if site == "Schwegenheim" %}selected{% endif %}>Schwegenheim</option>
</select>
<label for="timeframeSelect">Zeitrahmen (in Tage)</label>
<select class="form-control" id="timeframeSelect" name="days" onchange="this.form.submit();">
<option value="7" {% if timeframe == 7 %}selected{% endif %}>letzte 7 Tage</option>
<option value="30" {% if timeframe == 30 %}selected{% endif %}>letzte 30 Tage</option>
<option value="365" {% if timeframe == 365 %}selected{% endif %}>letzte 365 Tage</option>
<option value="all" {% if timeframe == "all" %}selected{% endif %}>Alle Jahre</option>
</select>
</div>
</form>
<!-- Table Output -->
<table class="table table-bordered">
<thead>
<tr>
<th>Anzahl</th>
<th>Titel</th>
</tr>
</thead>
<tbody>
{% for count, titel in performance_data %}
<tr>
<td>{{ count }}</td>
<td>{{ titel }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</body>
</html>