add song analytics
This commit is contained in:
parent
a6e29d81ef
commit
04bb218ac7
70
analytics.py
70
analytics.py
@ -3,6 +3,8 @@ from flask import render_template, request, session
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import geoip2.database
|
||||
from auth import require_secret
|
||||
from collections import defaultdict
|
||||
import json
|
||||
import os
|
||||
|
||||
file_access_temp = []
|
||||
@ -12,6 +14,7 @@ DB_NAME = 'access_log.db'
|
||||
|
||||
# Create a single global connection to SQLite
|
||||
log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
|
||||
search_db = sqlite3.connect("search.db", check_same_thread=False)
|
||||
|
||||
# geo location
|
||||
geoReader = geoip2.database.Reader('GeoLite2-City.mmdb')
|
||||
@ -137,6 +140,45 @@ def return_file_access():
|
||||
else:
|
||||
return []
|
||||
|
||||
def songs_dashboard():
|
||||
days_param = request.args.get("days", "30")
|
||||
site = str(request.args.get("site", "Speyer"))
|
||||
|
||||
# Determine cutoff_date based on the days parameter
|
||||
if days_param == "all":
|
||||
cutoff_date = None # No date filtering when analyzing all time
|
||||
timeframe = "all" # Pass the string to the template if needed
|
||||
else:
|
||||
timeframe = int(days_param)
|
||||
now = datetime.now()
|
||||
cutoff_date = now - timedelta(days=timeframe)
|
||||
|
||||
cursor = search_db.cursor()
|
||||
# Query rows with category "Gemeinsamer Gesang"
|
||||
query = "SELECT titel, performance_date FROM files WHERE category = ? and site = ?"
|
||||
cursor.execute(query, ('Gemeinsamer Gesang', site))
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Group and count performances per titel (only if performance_date is within the timeframe,
|
||||
# or count all if cutoff_date is None)
|
||||
performance_counts = defaultdict(int)
|
||||
for titel, performance_date in rows:
|
||||
if performance_date:
|
||||
try:
|
||||
# Convert date from "dd.mm.yyyy" format
|
||||
date_obj = datetime.strptime(performance_date, "%d.%m.%Y")
|
||||
except ValueError:
|
||||
continue
|
||||
# If cutoff_date is None, count all dates; otherwise, filter by cutoff_date.
|
||||
if cutoff_date is None or date_obj >= cutoff_date:
|
||||
performance_counts[titel] += 1
|
||||
|
||||
# Create a list of tuples: (count, titel), sorted in descending order by count.
|
||||
performance_data = [(count, titel) for titel, count in performance_counts.items()]
|
||||
performance_data.sort(reverse=True, key=lambda x: x[0])
|
||||
|
||||
return render_template('songs_dashboard.html', timeframe=timeframe, performance_data=performance_data, site=site)
|
||||
|
||||
@require_secret
|
||||
def connections():
|
||||
return render_template('connections.html')
|
||||
@ -424,3 +466,31 @@ def dashboard():
|
||||
cached_percentage=cached_percentage,
|
||||
timeframe_data=timeframe_data
|
||||
)
|
||||
|
||||
def export_to_excel():
|
||||
"""Export search_db to an Excel file and store it locally."""
|
||||
import pandas as pd
|
||||
|
||||
# Query all data from the search_db
|
||||
query = "SELECT * FROM files"
|
||||
cursor = search_db.cursor()
|
||||
cursor.execute(query)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Get column names from the cursor description
|
||||
column_names = [description[0] for description in cursor.description]
|
||||
|
||||
# Create a DataFrame and save it to an Excel file
|
||||
df = pd.DataFrame(rows, columns=column_names)
|
||||
df = df.drop(columns=['transcript'], errors='ignore') # Drop the 'id' column if it exists
|
||||
df.to_excel("search_db.xlsx", index=False)
|
||||
|
||||
# Close the cursor and database connection
|
||||
cursor.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Running as a standalone script.")
|
||||
export_to_excel()
|
||||
print("Exported search_db to search_db.xlsx")
|
||||
|
||||
|
||||
2
app.py
2
app.py
@ -41,6 +41,8 @@ app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'
|
||||
app.add_url_rule('/search', view_func=search.search, methods=['GET'])
|
||||
app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
|
||||
|
||||
app.add_url_rule('/songs_dashboard', view_func=a.songs_dashboard)
|
||||
|
||||
|
||||
# Grab the HOST_RULE environment variable
|
||||
host_rule = os.getenv("HOST_RULE", "")
|
||||
|
||||
95
index_for_search.py
Normal file → Executable file
95
index_for_search.py
Normal file → Executable file
@ -1,6 +1,8 @@
|
||||
import os
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
SEARCH_DB_NAME = 'search.db'
|
||||
ACCESS_LOG_DB_NAME = 'access_log.db'
|
||||
@ -25,6 +27,11 @@ def init_db():
|
||||
basefolder TEXT,
|
||||
filename TEXT,
|
||||
filetype TEXT,
|
||||
category TEXT,
|
||||
titel TEXT,
|
||||
name TEXT,
|
||||
performance_date TEXT,
|
||||
site TEXT,
|
||||
transcript TEXT,
|
||||
hitcount INTEGER DEFAULT 0,
|
||||
UNIQUE(relative_path, filename)
|
||||
@ -42,6 +49,31 @@ def init_db():
|
||||
except sqlite3.OperationalError:
|
||||
# Likely the column already exists, so we ignore this error.
|
||||
pass
|
||||
try:
|
||||
cursor.execute("ALTER TABLE files ADD COLUMN category TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
# Likely the column already exists, so we ignore this error.
|
||||
pass
|
||||
try:
|
||||
cursor.execute("ALTER TABLE files ADD COLUMN titel TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
# Likely the column already exists, so we ignore this error.
|
||||
pass
|
||||
try:
|
||||
cursor.execute("ALTER TABLE files ADD COLUMN name TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
# Likely the column already exists, so we ignore this error.
|
||||
pass
|
||||
try:
|
||||
cursor.execute("ALTER TABLE files ADD COLUMN performance_date TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
# Likely the column already exists, so we ignore this error.
|
||||
pass
|
||||
try:
|
||||
cursor.execute("ALTER TABLE files ADD COLUMN site TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
# Likely the column already exists, so we ignore this error.
|
||||
pass
|
||||
search_db.commit()
|
||||
|
||||
def scan_dir(directory):
|
||||
@ -86,7 +118,6 @@ def updatefileindex():
|
||||
# Accumulate scanned file data and keys for this base folder.
|
||||
scanned_files = [] # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount)
|
||||
current_keys = set()
|
||||
|
||||
for entry in scan_dir(norm_folderpath):
|
||||
entry_path = os.path.normpath(entry.path)
|
||||
# Get relative part by slicing if possible.
|
||||
@ -113,8 +144,66 @@ def updatefileindex():
|
||||
|
||||
# Retrieve the hit count for this file.
|
||||
hit_count = get_hit_count(relative_path)
|
||||
|
||||
category, titel, name, performance_date, site = None, None, None, None, None
|
||||
|
||||
if filetype == '.mp3':
|
||||
# Determine the site
|
||||
if foldername == 'Gottesdienste Speyer':
|
||||
site = 'Speyer'
|
||||
elif foldername == 'Gottesdienste Schwegenheim':
|
||||
site = 'Schwegenheim'
|
||||
|
||||
scanned_files.append((relative_path, foldername, entry.name, filetype, transcript, hit_count))
|
||||
# extract category and titel from filename
|
||||
filename_ext = os.path.splitext(entry.name)[0]
|
||||
left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
|
||||
if 'predigt' in left_side.lower():
|
||||
category = 'Predigt'
|
||||
elif 'wort' in left_side.lower() or 'einladung' in left_side.lower():
|
||||
category = 'Vorwort'
|
||||
elif 'chor' in left_side.lower():
|
||||
category = 'Chor'
|
||||
elif 'orchester' in left_side.lower():
|
||||
category = 'Orchester'
|
||||
elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower():
|
||||
category = 'Gruppenlied'
|
||||
elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower():
|
||||
category = 'Gemeinsamer Gesang'
|
||||
elif 'gedicht' in left_side.lower():
|
||||
category = 'Gedicht'
|
||||
elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower():
|
||||
category = 'Instrumental'
|
||||
else:
|
||||
category = None
|
||||
|
||||
if right_side:
|
||||
titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None)
|
||||
if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht':
|
||||
if not name: # kein Titel, nur Name
|
||||
name = titel
|
||||
titel = None
|
||||
else:
|
||||
titel = None
|
||||
name = None
|
||||
|
||||
# extract the date from path using regex (dd.mm.yyyy or dd.mm.yy)
|
||||
date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path)
|
||||
if date_match:
|
||||
date_str = date_match.group(1)
|
||||
# Convert to YYYY-MM-DD format
|
||||
try:
|
||||
date_obj = datetime.strptime(date_str, '%d.%m.%Y')
|
||||
performance_date = date_obj.strftime('%d.%m.%Y')
|
||||
except ValueError:
|
||||
try:
|
||||
date_obj = datetime.strptime(date_str, '%d.%m.%y')
|
||||
performance_date = date_obj.strftime('%d.%m.%Y')
|
||||
except ValueError:
|
||||
performance_date = None
|
||||
else:
|
||||
performance_date = None
|
||||
|
||||
scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count))
|
||||
current_keys.add((relative_path, entry.name))
|
||||
|
||||
# Remove database entries for files under this base folder that are no longer on disk.
|
||||
@ -128,7 +217,7 @@ def updatefileindex():
|
||||
|
||||
# Bulk write the scanned files using INSERT OR REPLACE.
|
||||
cursor.executemany(
|
||||
"INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
"INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, category, titel, name, performance_date, site, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
scanned_files
|
||||
)
|
||||
|
||||
|
||||
@ -4,5 +4,7 @@ pillow
|
||||
qrcode
|
||||
diskcache
|
||||
geoip2
|
||||
pandas
|
||||
openpyxl
|
||||
gunicorn
|
||||
eventlet
|
||||
|
||||
50
templates/songs_dashboard.html
Normal file
50
templates/songs_dashboard.html
Normal file
@ -0,0 +1,50 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Gemeinsamer Gesang</title>
|
||||
<!-- Bootstrap CSS -->
|
||||
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container mt-4">
|
||||
<h1>Dashboard: Gemeinsamer Gesang</h1>
|
||||
|
||||
<!-- Timeframe selection form -->
|
||||
<form method="get" action="{{ url_for('songs_dashboard') }}" class="mb-3" onsubmit="this.submit();">
|
||||
<div class="form-group">
|
||||
<label for="siteSelect">Gemeindehaus</label>
|
||||
<select class="form-control" id="siteSelect" name="site" onchange="this.form.submit();">
|
||||
<option value="Speyer" {% if site == "Speyer" %}selected{% endif %}>Speyer</option>
|
||||
<option value="Schwegenheim" {% if site == "Schwegenheim" %}selected{% endif %}>Schwegenheim</option>
|
||||
</select>
|
||||
<label for="timeframeSelect">Zeitrahmen (in Tage)</label>
|
||||
<select class="form-control" id="timeframeSelect" name="days" onchange="this.form.submit();">
|
||||
<option value="7" {% if timeframe == 7 %}selected{% endif %}>letzte 7 Tage</option>
|
||||
<option value="30" {% if timeframe == 30 %}selected{% endif %}>letzte 30 Tage</option>
|
||||
<option value="365" {% if timeframe == 365 %}selected{% endif %}>letzte 365 Tage</option>
|
||||
<option value="all" {% if timeframe == "all" %}selected{% endif %}>Alle Jahre</option>
|
||||
</select>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<!-- Table Output -->
|
||||
<table class="table table-bordered">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Anzahl</th>
|
||||
<th>Titel</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for count, titel in performance_data %}
|
||||
<tr>
|
||||
<td>{{ count }}</td>
|
||||
<td>{{ titel }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
x
Reference in New Issue
Block a user