add song analytics
This commit is contained in:
parent
a6e29d81ef
commit
04bb218ac7
70
analytics.py
70
analytics.py
@ -3,6 +3,8 @@ from flask import render_template, request, session
|
|||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
import geoip2.database
|
import geoip2.database
|
||||||
from auth import require_secret
|
from auth import require_secret
|
||||||
|
from collections import defaultdict
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
file_access_temp = []
|
file_access_temp = []
|
||||||
@ -12,6 +14,7 @@ DB_NAME = 'access_log.db'
|
|||||||
|
|
||||||
# Create a single global connection to SQLite
|
# Create a single global connection to SQLite
|
||||||
log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
|
log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
|
||||||
|
search_db = sqlite3.connect("search.db", check_same_thread=False)
|
||||||
|
|
||||||
# geo location
|
# geo location
|
||||||
geoReader = geoip2.database.Reader('GeoLite2-City.mmdb')
|
geoReader = geoip2.database.Reader('GeoLite2-City.mmdb')
|
||||||
@ -137,6 +140,45 @@ def return_file_access():
|
|||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def songs_dashboard():
|
||||||
|
days_param = request.args.get("days", "30")
|
||||||
|
site = str(request.args.get("site", "Speyer"))
|
||||||
|
|
||||||
|
# Determine cutoff_date based on the days parameter
|
||||||
|
if days_param == "all":
|
||||||
|
cutoff_date = None # No date filtering when analyzing all time
|
||||||
|
timeframe = "all" # Pass the string to the template if needed
|
||||||
|
else:
|
||||||
|
timeframe = int(days_param)
|
||||||
|
now = datetime.now()
|
||||||
|
cutoff_date = now - timedelta(days=timeframe)
|
||||||
|
|
||||||
|
cursor = search_db.cursor()
|
||||||
|
# Query rows with category "Gemeinsamer Gesang"
|
||||||
|
query = "SELECT titel, performance_date FROM files WHERE category = ? and site = ?"
|
||||||
|
cursor.execute(query, ('Gemeinsamer Gesang', site))
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
# Group and count performances per titel (only if performance_date is within the timeframe,
|
||||||
|
# or count all if cutoff_date is None)
|
||||||
|
performance_counts = defaultdict(int)
|
||||||
|
for titel, performance_date in rows:
|
||||||
|
if performance_date:
|
||||||
|
try:
|
||||||
|
# Convert date from "dd.mm.yyyy" format
|
||||||
|
date_obj = datetime.strptime(performance_date, "%d.%m.%Y")
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
# If cutoff_date is None, count all dates; otherwise, filter by cutoff_date.
|
||||||
|
if cutoff_date is None or date_obj >= cutoff_date:
|
||||||
|
performance_counts[titel] += 1
|
||||||
|
|
||||||
|
# Create a list of tuples: (count, titel), sorted in descending order by count.
|
||||||
|
performance_data = [(count, titel) for titel, count in performance_counts.items()]
|
||||||
|
performance_data.sort(reverse=True, key=lambda x: x[0])
|
||||||
|
|
||||||
|
return render_template('songs_dashboard.html', timeframe=timeframe, performance_data=performance_data, site=site)
|
||||||
|
|
||||||
@require_secret
|
@require_secret
|
||||||
def connections():
|
def connections():
|
||||||
return render_template('connections.html')
|
return render_template('connections.html')
|
||||||
@ -424,3 +466,31 @@ def dashboard():
|
|||||||
cached_percentage=cached_percentage,
|
cached_percentage=cached_percentage,
|
||||||
timeframe_data=timeframe_data
|
timeframe_data=timeframe_data
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def export_to_excel():
|
||||||
|
"""Export search_db to an Excel file and store it locally."""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Query all data from the search_db
|
||||||
|
query = "SELECT * FROM files"
|
||||||
|
cursor = search_db.cursor()
|
||||||
|
cursor.execute(query)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
# Get column names from the cursor description
|
||||||
|
column_names = [description[0] for description in cursor.description]
|
||||||
|
|
||||||
|
# Create a DataFrame and save it to an Excel file
|
||||||
|
df = pd.DataFrame(rows, columns=column_names)
|
||||||
|
df = df.drop(columns=['transcript'], errors='ignore') # Drop the 'id' column if it exists
|
||||||
|
df.to_excel("search_db.xlsx", index=False)
|
||||||
|
|
||||||
|
# Close the cursor and database connection
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Running as a standalone script.")
|
||||||
|
export_to_excel()
|
||||||
|
print("Exported search_db to search_db.xlsx")
|
||||||
|
|
||||||
|
|
||||||
2
app.py
2
app.py
@ -41,6 +41,8 @@ app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'
|
|||||||
app.add_url_rule('/search', view_func=search.search, methods=['GET'])
|
app.add_url_rule('/search', view_func=search.search, methods=['GET'])
|
||||||
app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
|
app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
|
||||||
|
|
||||||
|
app.add_url_rule('/songs_dashboard', view_func=a.songs_dashboard)
|
||||||
|
|
||||||
|
|
||||||
# Grab the HOST_RULE environment variable
|
# Grab the HOST_RULE environment variable
|
||||||
host_rule = os.getenv("HOST_RULE", "")
|
host_rule = os.getenv("HOST_RULE", "")
|
||||||
|
|||||||
95
index_for_search.py
Normal file → Executable file
95
index_for_search.py
Normal file → Executable file
@ -1,6 +1,8 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
SEARCH_DB_NAME = 'search.db'
|
SEARCH_DB_NAME = 'search.db'
|
||||||
ACCESS_LOG_DB_NAME = 'access_log.db'
|
ACCESS_LOG_DB_NAME = 'access_log.db'
|
||||||
@ -25,6 +27,11 @@ def init_db():
|
|||||||
basefolder TEXT,
|
basefolder TEXT,
|
||||||
filename TEXT,
|
filename TEXT,
|
||||||
filetype TEXT,
|
filetype TEXT,
|
||||||
|
category TEXT,
|
||||||
|
titel TEXT,
|
||||||
|
name TEXT,
|
||||||
|
performance_date TEXT,
|
||||||
|
site TEXT,
|
||||||
transcript TEXT,
|
transcript TEXT,
|
||||||
hitcount INTEGER DEFAULT 0,
|
hitcount INTEGER DEFAULT 0,
|
||||||
UNIQUE(relative_path, filename)
|
UNIQUE(relative_path, filename)
|
||||||
@ -42,6 +49,31 @@ def init_db():
|
|||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
# Likely the column already exists, so we ignore this error.
|
# Likely the column already exists, so we ignore this error.
|
||||||
pass
|
pass
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE files ADD COLUMN category TEXT")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# Likely the column already exists, so we ignore this error.
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE files ADD COLUMN titel TEXT")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# Likely the column already exists, so we ignore this error.
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE files ADD COLUMN name TEXT")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# Likely the column already exists, so we ignore this error.
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE files ADD COLUMN performance_date TEXT")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# Likely the column already exists, so we ignore this error.
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
cursor.execute("ALTER TABLE files ADD COLUMN site TEXT")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# Likely the column already exists, so we ignore this error.
|
||||||
|
pass
|
||||||
search_db.commit()
|
search_db.commit()
|
||||||
|
|
||||||
def scan_dir(directory):
|
def scan_dir(directory):
|
||||||
@ -86,7 +118,6 @@ def updatefileindex():
|
|||||||
# Accumulate scanned file data and keys for this base folder.
|
# Accumulate scanned file data and keys for this base folder.
|
||||||
scanned_files = [] # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount)
|
scanned_files = [] # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount)
|
||||||
current_keys = set()
|
current_keys = set()
|
||||||
|
|
||||||
for entry in scan_dir(norm_folderpath):
|
for entry in scan_dir(norm_folderpath):
|
||||||
entry_path = os.path.normpath(entry.path)
|
entry_path = os.path.normpath(entry.path)
|
||||||
# Get relative part by slicing if possible.
|
# Get relative part by slicing if possible.
|
||||||
@ -113,8 +144,66 @@ def updatefileindex():
|
|||||||
|
|
||||||
# Retrieve the hit count for this file.
|
# Retrieve the hit count for this file.
|
||||||
hit_count = get_hit_count(relative_path)
|
hit_count = get_hit_count(relative_path)
|
||||||
|
|
||||||
|
category, titel, name, performance_date, site = None, None, None, None, None
|
||||||
|
|
||||||
|
if filetype == '.mp3':
|
||||||
|
# Determine the site
|
||||||
|
if foldername == 'Gottesdienste Speyer':
|
||||||
|
site = 'Speyer'
|
||||||
|
elif foldername == 'Gottesdienste Schwegenheim':
|
||||||
|
site = 'Schwegenheim'
|
||||||
|
|
||||||
scanned_files.append((relative_path, foldername, entry.name, filetype, transcript, hit_count))
|
# extract category and titel from filename
|
||||||
|
filename_ext = os.path.splitext(entry.name)[0]
|
||||||
|
left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
|
||||||
|
if 'predigt' in left_side.lower():
|
||||||
|
category = 'Predigt'
|
||||||
|
elif 'wort' in left_side.lower() or 'einladung' in left_side.lower():
|
||||||
|
category = 'Vorwort'
|
||||||
|
elif 'chor' in left_side.lower():
|
||||||
|
category = 'Chor'
|
||||||
|
elif 'orchester' in left_side.lower():
|
||||||
|
category = 'Orchester'
|
||||||
|
elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower():
|
||||||
|
category = 'Gruppenlied'
|
||||||
|
elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower():
|
||||||
|
category = 'Gemeinsamer Gesang'
|
||||||
|
elif 'gedicht' in left_side.lower():
|
||||||
|
category = 'Gedicht'
|
||||||
|
elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower():
|
||||||
|
category = 'Instrumental'
|
||||||
|
else:
|
||||||
|
category = None
|
||||||
|
|
||||||
|
if right_side:
|
||||||
|
titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None)
|
||||||
|
if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht':
|
||||||
|
if not name: # kein Titel, nur Name
|
||||||
|
name = titel
|
||||||
|
titel = None
|
||||||
|
else:
|
||||||
|
titel = None
|
||||||
|
name = None
|
||||||
|
|
||||||
|
# extract the date from path using regex (dd.mm.yyyy or dd.mm.yy)
|
||||||
|
date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path)
|
||||||
|
if date_match:
|
||||||
|
date_str = date_match.group(1)
|
||||||
|
# Convert to YYYY-MM-DD format
|
||||||
|
try:
|
||||||
|
date_obj = datetime.strptime(date_str, '%d.%m.%Y')
|
||||||
|
performance_date = date_obj.strftime('%d.%m.%Y')
|
||||||
|
except ValueError:
|
||||||
|
try:
|
||||||
|
date_obj = datetime.strptime(date_str, '%d.%m.%y')
|
||||||
|
performance_date = date_obj.strftime('%d.%m.%Y')
|
||||||
|
except ValueError:
|
||||||
|
performance_date = None
|
||||||
|
else:
|
||||||
|
performance_date = None
|
||||||
|
|
||||||
|
scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count))
|
||||||
current_keys.add((relative_path, entry.name))
|
current_keys.add((relative_path, entry.name))
|
||||||
|
|
||||||
# Remove database entries for files under this base folder that are no longer on disk.
|
# Remove database entries for files under this base folder that are no longer on disk.
|
||||||
@ -128,7 +217,7 @@ def updatefileindex():
|
|||||||
|
|
||||||
# Bulk write the scanned files using INSERT OR REPLACE.
|
# Bulk write the scanned files using INSERT OR REPLACE.
|
||||||
cursor.executemany(
|
cursor.executemany(
|
||||||
"INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?)",
|
"INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, category, titel, name, performance_date, site, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
scanned_files
|
scanned_files
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -4,5 +4,7 @@ pillow
|
|||||||
qrcode
|
qrcode
|
||||||
diskcache
|
diskcache
|
||||||
geoip2
|
geoip2
|
||||||
|
pandas
|
||||||
|
openpyxl
|
||||||
gunicorn
|
gunicorn
|
||||||
eventlet
|
eventlet
|
||||||
|
|||||||
50
templates/songs_dashboard.html
Normal file
50
templates/songs_dashboard.html
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Gemeinsamer Gesang</title>
|
||||||
|
<!-- Bootstrap CSS -->
|
||||||
|
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container mt-4">
|
||||||
|
<h1>Dashboard: Gemeinsamer Gesang</h1>
|
||||||
|
|
||||||
|
<!-- Timeframe selection form -->
|
||||||
|
<form method="get" action="{{ url_for('songs_dashboard') }}" class="mb-3" onsubmit="this.submit();">
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="siteSelect">Gemeindehaus</label>
|
||||||
|
<select class="form-control" id="siteSelect" name="site" onchange="this.form.submit();">
|
||||||
|
<option value="Speyer" {% if site == "Speyer" %}selected{% endif %}>Speyer</option>
|
||||||
|
<option value="Schwegenheim" {% if site == "Schwegenheim" %}selected{% endif %}>Schwegenheim</option>
|
||||||
|
</select>
|
||||||
|
<label for="timeframeSelect">Zeitrahmen (in Tage)</label>
|
||||||
|
<select class="form-control" id="timeframeSelect" name="days" onchange="this.form.submit();">
|
||||||
|
<option value="7" {% if timeframe == 7 %}selected{% endif %}>letzte 7 Tage</option>
|
||||||
|
<option value="30" {% if timeframe == 30 %}selected{% endif %}>letzte 30 Tage</option>
|
||||||
|
<option value="365" {% if timeframe == 365 %}selected{% endif %}>letzte 365 Tage</option>
|
||||||
|
<option value="all" {% if timeframe == "all" %}selected{% endif %}>Alle Jahre</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<!-- Table Output -->
|
||||||
|
<table class="table table-bordered">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Anzahl</th>
|
||||||
|
<th>Titel</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for count, titel in performance_data %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ count }}</td>
|
||||||
|
<td>{{ titel }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Loading…
x
Reference in New Issue
Block a user