add song analytics

2025-04-07 21:06:23 +00:00 · 2025-04-07 21:06:23 +00:00 · 04bb218ac7
commit 04bb218ac7
parent a6e29d81ef
5 changed files with 216 additions and 3 deletions
--- a/analytics.py
+++ b/analytics.py
@ -3,6 +3,8 @@ from flask import render_template, request, session
 from datetime import datetime, timedelta, timezone
 import geoip2.database
 from auth import require_secret
+from collections import defaultdict
+import json
 import os

 file_access_temp = []
@ -12,6 +14,7 @@ DB_NAME = 'access_log.db'

 # Create a single global connection to SQLite
 log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
+search_db = sqlite3.connect("search.db", check_same_thread=False)

 # geo location
 geoReader = geoip2.database.Reader('GeoLite2-City.mmdb')
@ -137,6 +140,45 @@ def return_file_access():
    else:
        return []

+def songs_dashboard():
+    days_param = request.args.get("days", "30")
+    site = str(request.args.get("site", "Speyer"))
+    
+    # Determine cutoff_date based on the days parameter
+    if days_param == "all":
+        cutoff_date = None  # No date filtering when analyzing all time
+        timeframe = "all"   # Pass the string to the template if needed
+    else:
+        timeframe = int(days_param)
+        now = datetime.now()
+        cutoff_date = now - timedelta(days=timeframe)
+    
+    cursor = search_db.cursor()
+    # Query rows with category "Gemeinsamer Gesang"
+    query = "SELECT titel, performance_date FROM files WHERE category = ? and site = ?"
+    cursor.execute(query, ('Gemeinsamer Gesang', site))
+    rows = cursor.fetchall()
+    
+    # Group and count performances per titel (only if performance_date is within the timeframe,
+    # or count all if cutoff_date is None)
+    performance_counts = defaultdict(int)
+    for titel, performance_date in rows:
+        if performance_date:
+            try:
+                # Convert date from "dd.mm.yyyy" format
+                date_obj = datetime.strptime(performance_date, "%d.%m.%Y")
+            except ValueError:
+                continue
+            # If cutoff_date is None, count all dates; otherwise, filter by cutoff_date.
+            if cutoff_date is None or date_obj >= cutoff_date:
+                performance_counts[titel] += 1
+    
+    # Create a list of tuples: (count, titel), sorted in descending order by count.
+    performance_data = [(count, titel) for titel, count in performance_counts.items()]
+    performance_data.sort(reverse=True, key=lambda x: x[0])
+    
+    return render_template('songs_dashboard.html', timeframe=timeframe, performance_data=performance_data, site=site)
+
@require_secret
 def connections():
    return render_template('connections.html')
@ -424,3 +466,31 @@ def dashboard():
        cached_percentage=cached_percentage,
        timeframe_data=timeframe_data
    )
+
+def export_to_excel():
+    """Export search_db to an Excel file and store it locally."""
+    import pandas as pd
+
+    # Query all data from the search_db
+    query = "SELECT * FROM files"
+    cursor = search_db.cursor()
+    cursor.execute(query)
+    rows = cursor.fetchall()
+
+    # Get column names from the cursor description
+    column_names = [description[0] for description in cursor.description]
+
+    # Create a DataFrame and save it to an Excel file
+    df = pd.DataFrame(rows, columns=column_names)
+    df = df.drop(columns=['transcript'], errors='ignore')  # Drop the 'id' column if it exists   
+    df.to_excel("search_db.xlsx", index=False)
+
+    # Close the cursor and database connection
+    cursor.close()
+
+if __name__ == "__main__":
+    print("Running as a standalone script.")
+    export_to_excel()
+    print("Exported search_db to search_db.xlsx")
+    
+    
--- a/app.py
+++ b/app.py
@ -41,6 +41,8 @@ app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'
 app.add_url_rule('/search', view_func=search.search, methods=['GET'])
 app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])

+app.add_url_rule('/songs_dashboard', view_func=a.songs_dashboard)
+

 # Grab the HOST_RULE environment variable
 host_rule = os.getenv("HOST_RULE", "")
--- a/index_for_search.py
+++ b/index_for_search.py
@ -1,6 +1,8 @@
 import os
 import json
 import sqlite3
+from datetime import datetime
+import re

 SEARCH_DB_NAME = 'search.db'
 ACCESS_LOG_DB_NAME = 'access_log.db'
@ -25,6 +27,11 @@ def init_db():
            basefolder TEXT,
            filename TEXT,
            filetype TEXT,
+            category TEXT,
+            titel TEXT,
+            name TEXT,
+            performance_date TEXT,
+            site TEXT,
            transcript TEXT,
            hitcount INTEGER DEFAULT 0,
            UNIQUE(relative_path, filename)
@ -42,6 +49,31 @@ def init_db():
    except sqlite3.OperationalError:
        # Likely the column already exists, so we ignore this error.
        pass
+    try:
+        cursor.execute("ALTER TABLE files ADD COLUMN category TEXT")
+    except sqlite3.OperationalError:
+        # Likely the column already exists, so we ignore this error.
+        pass
+    try:
+        cursor.execute("ALTER TABLE files ADD COLUMN titel TEXT")
+    except sqlite3.OperationalError:
+        # Likely the column already exists, so we ignore this error.
+        pass
+    try:
+        cursor.execute("ALTER TABLE files ADD COLUMN name TEXT")
+    except sqlite3.OperationalError:
+        # Likely the column already exists, so we ignore this error.
+        pass
+    try:
+        cursor.execute("ALTER TABLE files ADD COLUMN performance_date TEXT")
+    except sqlite3.OperationalError:
+        # Likely the column already exists, so we ignore this error.
+        pass
+    try:
+        cursor.execute("ALTER TABLE files ADD COLUMN site TEXT")
+    except sqlite3.OperationalError:
+        # Likely the column already exists, so we ignore this error.
+        pass
    search_db.commit()

 def scan_dir(directory):
@ -86,7 +118,6 @@ def updatefileindex():
            # Accumulate scanned file data and keys for this base folder.
            scanned_files = []  # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount)
            current_keys = set()
-
            for entry in scan_dir(norm_folderpath):
                entry_path = os.path.normpath(entry.path)
                # Get relative part by slicing if possible.
@ -114,7 +145,65 @@ def updatefileindex():
                # Retrieve the hit count for this file.
                hit_count = get_hit_count(relative_path)
                
-                scanned_files.append((relative_path, foldername, entry.name, filetype, transcript, hit_count))
+                category, titel, name, performance_date, site = None, None, None, None, None
+                
+                if filetype == '.mp3':
+                    # Determine the site
+                    if foldername == 'Gottesdienste Speyer':
+                        site = 'Speyer'
+                    elif foldername == 'Gottesdienste Schwegenheim':
+                        site = 'Schwegenheim'
+
+                    # extract category and titel from filename
+                    filename_ext = os.path.splitext(entry.name)[0]
+                    left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
+                    if 'predigt' in left_side.lower():
+                        category = 'Predigt'
+                    elif 'wort' in left_side.lower() or 'einladung' in left_side.lower():
+                        category = 'Vorwort'
+                    elif 'chor' in left_side.lower():
+                        category = 'Chor'
+                    elif 'orchester' in left_side.lower():
+                        category = 'Orchester'
+                    elif 'gruppenlied' in left_side.lower() or 'jugendlied' in left_side.lower():
+                        category = 'Gruppenlied'
+                    elif 'gemeinsam' in left_side.lower() or 'gesang' in left_side.lower() or 'lied' in left_side.lower():
+                        category = 'Gemeinsamer Gesang'
+                    elif 'gedicht' in left_side.lower():
+                        category = 'Gedicht'
+                    elif 'instrumental' in left_side.lower() or 'musikstück' in left_side.lower():
+                        category = 'Instrumental'
+                    else:
+                        category = None
+
+                    if right_side:
+                        titel, name = right_side.split('-', 1) if '-' in right_side else (right_side, None)
+                        if category == 'Predigt' or category == 'Vorwort' or category == 'Gedicht':
+                            if not name: # kein Titel, nur Name
+                                name = titel
+                                titel = None
+                    else:
+                        titel = None
+                        name = None
+                    
+                    # extract the date from path using regex (dd.mm.yyyy or dd.mm.yy)
+                    date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path)
+                    if date_match:
+                        date_str = date_match.group(1)
+                        # Convert to YYYY-MM-DD format
+                        try:
+                            date_obj = datetime.strptime(date_str, '%d.%m.%Y')
+                            performance_date = date_obj.strftime('%d.%m.%Y')
+                        except ValueError:
+                            try:
+                                date_obj = datetime.strptime(date_str, '%d.%m.%y')
+                                performance_date = date_obj.strftime('%d.%m.%Y')
+                            except ValueError:
+                                performance_date = None
+                    else:
+                        performance_date = None
+
+                scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count))
                current_keys.add((relative_path, entry.name))

            # Remove database entries for files under this base folder that are no longer on disk.
@ -128,7 +217,7 @@ def updatefileindex():

            # Bulk write the scanned files using INSERT OR REPLACE.
            cursor.executemany(
-                "INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?)",
+                "INSERT OR REPLACE INTO files (relative_path, basefolder, filename, filetype, category, titel, name, performance_date, site, transcript, hitcount) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                scanned_files
            )

--- a/requirements.txt
+++ b/requirements.txt
@ -4,5 +4,7 @@ pillow
 qrcode
 diskcache
 geoip2
+pandas
+openpyxl
 gunicorn
 eventlet
--- a/templates/songs_dashboard.html
+++ b/templates/songs_dashboard.html
@ -0,0 +1,50 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>Gemeinsamer Gesang</title>
+  <!-- Bootstrap CSS -->
+  <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
+</head>
+<body>
+<div class="container mt-4">
+  <h1>Dashboard: Gemeinsamer Gesang</h1>
+  
+  <!-- Timeframe selection form -->
+<form method="get" action="{{ url_for('songs_dashboard') }}" class="mb-3" onsubmit="this.submit();">
+    <div class="form-group">
+        <label for="siteSelect">Gemeindehaus</label>
+        <select class="form-control" id="siteSelect" name="site" onchange="this.form.submit();">
+            <option value="Speyer" {% if site == "Speyer" %}selected{% endif %}>Speyer</option>
+            <option value="Schwegenheim" {% if site == "Schwegenheim" %}selected{% endif %}>Schwegenheim</option>
+        </select>
+        <label for="timeframeSelect">Zeitrahmen (in Tage)</label>
+        <select class="form-control" id="timeframeSelect" name="days" onchange="this.form.submit();">
+            <option value="7" {% if timeframe == 7 %}selected{% endif %}>letzte 7 Tage</option>
+            <option value="30" {% if timeframe == 30 %}selected{% endif %}>letzte 30 Tage</option>
+            <option value="365" {% if timeframe == 365 %}selected{% endif %}>letzte 365 Tage</option>
+            <option value="all" {% if timeframe == "all" %}selected{% endif %}>Alle Jahre</option>
+        </select>
+    </div>
+</form>
+  
+  <!-- Table Output -->
+  <table class="table table-bordered">
+    <thead>
+      <tr>
+        <th>Anzahl</th>
+        <th>Titel</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for count, titel in performance_data %}
+        <tr>
+          <td>{{ count }}</td>
+          <td>{{ titel }}</td>
+        </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+</div>
+</body>
+</html>