import sqlite3 from flask import render_template, request, session from datetime import datetime, timedelta, timezone import geoip2.database from auth import require_secret from collections import defaultdict import pandas as pd import json import os import auth import helperfunctions as hf file_access_temp = [] folder_today = [] folder_yesterday = [] app_config = auth.return_app_config() # Create a single global connection to SQLite log_db = sqlite3.connect("access_log.db", check_same_thread=False) search_db = sqlite3.connect("search.db", check_same_thread=False) # geo location geoReader = geoip2.database.Reader('GeoLite2-City.mmdb') def init_log_db(): """Create the file_access_log table if it doesn't already exist.""" with log_db: log_db.execute(''' CREATE TABLE IF NOT EXISTS file_access_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, rel_path TEXT, filesize INTEGER, mime TEXT, city TEXT, country TEXT, user_agent TEXT, device_id TEXT, cached BOOLEAN ) ''') init_log_db() def lookup_location(ip): try: response = geoReader.city(ip) country = response.country.name if response.country.name else "Unknown" city = response.city.name if response.city.name else "Unknown" return city, country except Exception: return "Unknown", "Unknown" def get_device_type(user_agent): """Classify device type based on user agent string.""" if 'Android' in user_agent: return 'Android' elif 'iPhone' in user_agent or 'iPad' in user_agent: return 'iOS' elif 'Windows' in user_agent: return 'Windows' elif 'Macintosh' in user_agent or 'Mac OS' in user_agent: return 'MacOS' elif 'Linux' in user_agent: return 'Linux' else: return 'Other' def parse_timestamp(ts_str): try: # Try the normal ISO parsing. return datetime.fromisoformat(ts_str) except ValueError as e: if 'unconverted data remains' in str(e): # Find where the timezone starts. Look for a '+' or '-' after the time. for sign in ['+', '-']: pos = ts_str.find(sign) if pos != -1: # Assume the base part is up to pos and then the tz part base = ts_str[:pos] tz_part = ts_str[pos:] # Remove any colon from the tz part to help with parsing. tz_clean = tz_part.replace(':', '') # Try parsing the base part. It might or might not have fractional seconds. try: dt = datetime.fromisoformat(base) except ValueError: dt = datetime.strptime(base, '%Y-%m-%dT%H:%M:%S') # Extract hours and minutes from the tz portion. try: offset_hours = int(tz_clean[1:3]) offset_minutes = int(tz_clean[3:5]) except Exception: raise ValueError(f"Unable to parse timezone from {ts_str}") offset = timedelta(hours=offset_hours, minutes=offset_minutes) if tz_clean[0] == '-': offset = -offset # Return a timezone-aware datetime. return dt.replace(tzinfo=timezone(offset)) # If it's some other ValueError, re-raise it. raise def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached): """Insert a file access record into the database and prune entries older than 10 minutes, and track today’s files separately in folder_today.""" global file_access_temp, folder_today, folder_yesterday # Create a timezone-aware timestamp now = datetime.now(timezone.utc).astimezone() iso_ts = now.isoformat() # Convert the IP address to a location city, country = lookup_location(ip_address) with log_db: log_db.execute(''' INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, city, country, user_agent, device_id, cached) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) ''', (iso_ts, rel_path, filesize, mime, city, country, user_agent, device_id, cached)) # Prune temp entries older than 10 minutes cutoff = now - timedelta(minutes=10) file_access_temp[:] = [ entry for entry in file_access_temp if parse_timestamp(entry[0]) >= cutoff ] # Keep only today's entries in folder_today today_str = iso_ts.split('T', 1)[0] folder_today[:] = [ entry for entry in folder_today if entry['date_str'] == today_str ] # Keep only yesterday's entries in folder_yesterday yesterday_str = (now - timedelta(days=1)).isoformat().split('T', 1)[0] folder_yesterday[:] = [ entry for entry in folder_yesterday if entry['date_str'] == yesterday_str ] # If this new access is from today, record it # Compare the helper’s YYYY-MM-DD string to today’s ISO date string date_from_path = hf.extract_date_from_string(rel_path) if date_from_path == today_str: # get just the folder part (everything before the final '/') folder_path = rel_path.rsplit('/', 1)[0] if '/' in rel_path else rel_path # only append if that folder isn't already in folder_today if not any(entry['rel_path'] == folder_path for entry in folder_today): folder_today.append({'date_str': today_str, 'rel_path': folder_path}) # If this new access is from yesterday, record it if date_from_path == yesterday_str: # get just the folder part (everything before the final '/') folder_path = rel_path.rsplit('/', 1)[0] if '/' in rel_path else rel_path # only append if that folder isn't already in folder_yesterday if not any(entry['rel_path'] == folder_path for entry in folder_yesterday): folder_yesterday.append({'date_str': yesterday_str, 'rel_path': folder_path}) # Finally, insert the new access at the top of the temp log file_access_temp.insert(0, [ iso_ts, rel_path, filesize, mime, f"{city}, {country}", user_agent, device_id, cached ]) return True def return_folder_today(): """ Return only those folder_today entries whose first segment (up to the first '/') is in session['folders'].keys(). """ valid_keys = set(session.get('folders', {}).keys()) filtered = [] for entry in folder_today: # get the part before the first slash top_level = entry['rel_path'].split('/', 1)[0] # include only if this segment is one of the session keys if top_level in valid_keys: filtered.append(entry) return filtered def return_folder_yesterday(): """ Return only those folder_yesterday entries whose first segment (up to the first '/') is in session['folders'].keys(). """ valid_keys = set(session.get('folders', {}).keys()) filtered = [] for entry in folder_yesterday: # get the part before the first slash top_level = entry['rel_path'].split('/', 1)[0] # include only if this segment is one of the session keys if top_level in valid_keys: filtered.append(entry) return filtered def return_file_access(): """Return recent file access logs from memory (the last 10 minutes).""" global file_access_temp if file_access_temp: # Create a timezone-aware cutoff time cutoff_time = datetime.now(timezone.utc).astimezone() - timedelta(minutes=10) # Only keep entries with timestamps greater than or equal to cutoff_time file_access_temp[:] = [ entry for entry in file_access_temp if datetime.fromisoformat(entry[0]) >= cutoff_time ] return file_access_temp else: return [] def songs_dashboard(): # — SESSION & PARAM HANDLING (unchanged) — if 'songs_dashboard_timeframe' not in session: session['songs_dashboard_timeframe'] = "30" timeframe_param = request.args.get("timeframe", session['songs_dashboard_timeframe']) session['songs_dashboard_timeframe'] = timeframe_param if 'songs_dashboard_category' not in session: session['songs_dashboard_category'] = "Gemeinsamer Gesang" category = request.args.get("category", session['songs_dashboard_category']) session['songs_dashboard_category'] = category if 'songs_dashboard_site' not in session: session['songs_dashboard_site'] = "Speyer" site = request.args.get("site", session['songs_dashboard_site']) session['songs_dashboard_site'] = site # — DETERMINE CUTOFF + TODAY STRINGS — now = datetime.now() params = [category, site] date_clauses = [] if timeframe_param != "all": cutoff = now - timedelta(days=int(timeframe_param)) date_clauses.append("performance_date >= ?") params.append(cutoff.strftime("%Y-%m-%d")) # filter out any future-dated rows at the DB level date_clauses.append("performance_date <= ?") params.append(now.strftime("%Y-%m-%d")) where_sql = " AND ".join(["category = ?", "site = ?"] + date_clauses) cursor = search_db.cursor() cursor.execute( f"SELECT titel, performance_date FROM files WHERE {where_sql}", params ) rows = cursor.fetchall() # — AGGREGATE COUNTS + LAST-PERFORMED, WITH ERROR LOGGING — performance_counts = defaultdict(int) last_performed_dates = {} for titel, perf_date_str in rows: if not perf_date_str: continue perf_date_str = perf_date_str.strip() try: perf_date = datetime.strptime(perf_date_str, "%Y-%m-%d") except ValueError: print(f"[songs_dashboard] bad date format for “{titel}”: “{perf_date_str}”") continue performance_counts[titel] += 1 prev = last_performed_dates.get(titel) if prev is None or perf_date > prev: last_performed_dates[titel] = perf_date # — BUILD LIST FOR TEMPLATE — performance_data = [] for titel, count in performance_counts.items(): last_str = last_performed_dates[titel].strftime("%d.%m.%Y") performance_data.append({ "titel": titel, "count": count, "last_performed": last_str }) performance_data.sort(key=lambda x: x["count"], reverse=True) # — RENDER — return render_template( 'songs_dashboard.html', timeframe=timeframe_param, performance_data=performance_data, site=site, category=category, admin_enabled=auth.is_admin(), title_short=app_config.get('TITLE_SHORT', 'Default Title'), title_long= app_config.get('TITLE_LONG', 'Default Title'), ) @require_secret def connections(): title_short = app_config.get('TITLE_SHORT', 'Default Title') title_long = app_config.get('TITLE_LONG' , 'Default Title') return render_template('connections.html', admin_enabled=auth.is_admin(), title_short=title_short, title_long=title_long) @require_secret def dashboard(): if 'filetype' not in session: session['filetype'] = 'audio' if 'timeframe' not in session: session['timeframe'] = 'last24hours' session['filetype'] = request.args.get('filetype', session['filetype']) session['timeframe'] = request.args.get('timeframe', session['timeframe']) now = datetime.now() # default filetype if not found filetype = 'other' # Some simplistic sets to decide how we match the MIME type audio_list = ['mp3', 'wav', 'ton', 'audio'] image_list = ['jpg', 'jpeg', 'image', 'photo', 'bild', 'foto'] video_list = ['mp4', 'mov', 'wmv', 'avi', 'film', 'video'] if session['filetype'].lower() in audio_list: filetype = 'audio/' elif session['filetype'].lower() in image_list: filetype = 'image/' elif session['filetype'].lower() in video_list: filetype = 'video/' # Determine start time based on session['timeframe'] if session['timeframe'] == 'last24hours': start_dt = now - timedelta(hours=24) elif session['timeframe'] == '7days': start_dt = now - timedelta(days=7) elif session['timeframe'] == '14days': start_dt = now - timedelta(days=14) elif session['timeframe'] == '30days': start_dt = now - timedelta(days=30) elif session['timeframe'] == '365days': start_dt = now - timedelta(days=365) else: start_dt = now - timedelta(hours=24) # We'll compare the textual timestamp (ISO 8601). start_str = start_dt.isoformat() # Build the SQL filter if filetype == 'other': # Exclude audio, image, video filetype_filter_sql = ( "AND mime NOT LIKE 'audio/%' " "AND mime NOT LIKE 'image/%' " "AND mime NOT LIKE 'video/%' " ) params_for_filter = (start_str,) else: # Filter for mimes that start with the given type filetype_filter_sql = "AND mime LIKE ?" params_for_filter = (start_str, filetype + '%') # 1. Top files by access count # removed and moved to file_access() function # 2. Distinct device trend # We'll group by hour if "today", by day if 7days/14days/30days, by month if 365days if session['timeframe'] == 'last24hours': # Group by hour: substr(timestamp, 12, 2) -> HH query = f''' SELECT strftime('%Y-%m-%dT%H:00:00Z', replace(timestamp, 'T', ' ')) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif session['timeframe'] in ('7days', '14days', '30days'): # Group by day: substr(timestamp, 1, 10) -> YYYY-MM-DD query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif session['timeframe'] == '365days': # Group by month: substr(timestamp, 1, 7) -> YYYY-MM query = f''' SELECT substr(timestamp, 1, 7) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' else: # Default: group by day query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' with log_db: cursor = log_db.execute(query, params_for_filter) distinct_device_data_rows = cursor.fetchall() distinct_device_data = [ dict(bucket=r[0], count=r[1]) for r in distinct_device_data_rows ] # 3. Download trend # We'll group by hour if "today", by day if 7days/14days/30days, by month if 365days. if session['timeframe'] == 'last24hours': # Hour: substr(timestamp, 12, 2) -> HH query = f''' SELECT strftime('%Y-%m-%dT%H:00:00Z', replace(timestamp, 'T', ' ')) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif session['timeframe'] in ('7days', '14days', '30days'): # Day: substr(timestamp, 1, 10) -> YYYY-MM-DD query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif session['timeframe'] == '365days': # Month: substr(timestamp, 1, 7) -> YYYY-MM query = f''' SELECT substr(timestamp, 1, 7) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' else: # Default: group by day query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' with log_db: cursor = log_db.execute(query, params_for_filter) timeframe_data_rows = cursor.fetchall() timeframe_data = [ dict(bucket=r[0], count=r[1]) for r in timeframe_data_rows ] # 4. User agent distribution: Count user_agent once per device_id query = f''' SELECT user_agent, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY user_agent ORDER BY count DESC ''' with log_db: cursor = log_db.execute(query, params_for_filter) raw_user_agents = cursor.fetchall() device_counts = {} for (ua, cnt) in raw_user_agents: device = get_device_type(ua) device_counts[device] = device_counts.get(device, 0) + cnt user_agent_data = [ dict(device=d, count=c) for d, c in device_counts.items() ] # 5. Parent folder distribution query = f''' SELECT rel_path, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY rel_path ORDER BY count DESC ''' folder_data_dict = {} with log_db: cursor = log_db.execute(query, params_for_filter) for (rp, c) in cursor.fetchall(): if '/' in rp: parent_folder = rp.rsplit('/', 1)[0] else: parent_folder = "Root" folder_data_dict[parent_folder] = folder_data_dict.get(parent_folder, 0) + c folder_data = [dict(folder=f, count=cnt) for f, cnt in folder_data_dict.items()] folder_data.sort(key=lambda x: x['count'], reverse=True) folder_data = folder_data[:10] # 6. Aggregate locations with counts query = f''' SELECT city, country, COUNT(*) as count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY city, country ORDER BY count DESC ''' with log_db: cursor = log_db.execute(query, params_for_filter) locations = cursor.fetchall() # 7. Summary stats # total_accesses query = f''' SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} ''' with log_db: cursor = log_db.execute(query, params_for_filter) total_accesses = cursor.fetchone()[0] # unique_files query = f''' SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} ''' with log_db: cursor = log_db.execute(query, params_for_filter) unique_files = cursor.fetchone()[0] # unique_user query = f''' SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} ''' with log_db: cursor = log_db.execute(query, params_for_filter) unique_user = cursor.fetchone()[0] # Percentage of cached calls query = f''' SELECT (CAST(SUM(CASE WHEN cached = 1 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*)) * 100 FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} ''' with log_db: cursor = log_db.execute(query, params_for_filter) cached_percentage = cursor.fetchone()[0] if cached_percentage is not None: cached_percentage = f"{cached_percentage:.2f}" # 8. Process location data location_data_dict = {} for (city, country, cnt) in locations: key = (city, country) location_data_dict[key] = location_data_dict.get(key, 0) + cnt location_data = [ dict(city=k[0], country=k[1], count=v) for k, v in location_data_dict.items() ] location_data.sort(key=lambda x: x['count'], reverse=True) location_data = location_data[:20] title_short = app_config.get('TITLE_SHORT', 'Default Title') title_long = app_config.get('TITLE_LONG' , 'Default Title') return render_template( "dashboard.html", timeframe=session['timeframe'], distinct_device_data=distinct_device_data, user_agent_data=user_agent_data, folder_data=folder_data, location_data=location_data, total_accesses=total_accesses, unique_files=unique_files, unique_user=unique_user, cached_percentage=cached_percentage, timeframe_data=timeframe_data, admin_enabled=auth.is_admin(), title_short=title_short, title_long=title_long ) @require_secret def file_access(): if 'timeframe' not in session: session['timeframe'] = 'last24hours' session['timeframe'] = request.args.get('timeframe', session['timeframe']) now = datetime.now() filetype = 'audio/' # Determine start time based on session['timeframe'] if session['timeframe'] == 'last24hours': start_dt = now - timedelta(hours=24) elif session['timeframe'] == '7days': start_dt = now - timedelta(days=7) elif session['timeframe'] == '14days': start_dt = now - timedelta(days=14) elif session['timeframe'] == '30days': start_dt = now - timedelta(days=30) elif session['timeframe'] == '365days': start_dt = now - timedelta(days=365) else: start_dt = now - timedelta(hours=24) # We'll compare the textual timestamp (ISO 8601). start_str = start_dt.isoformat() # Filter for mimes that start with the given type filetype_filter_sql = "AND mime LIKE ?" params_for_filter = (start_str, filetype + '%') # 1. Top files by access count query = f''' SELECT rel_path, COUNT(*) as access_count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY rel_path ORDER BY access_count DESC LIMIT 1000 ''' with log_db: cursor = log_db.execute(query, params_for_filter) rows = cursor.fetchall() # Convert rows to a list of dictionaries and add category rows = [ { 'rel_path': rel_path, 'access_count': access_count, 'category': hf.extract_structure_from_string(rel_path)[0] } for rel_path, access_count in rows ] # Get possible categories from the rows categories = sorted({r['category'] for r in rows if r['category'] is not None}) all_categories = [None] + categories top20 = [] for category in all_categories: label = category if category is not None else 'Keine Kategorie gefunden !' files = [r for r in rows if r['category'] == category][:20] top20.append({ 'category': label, 'files': files }) title_short = app_config.get('TITLE_SHORT', 'Default Title') title_long = app_config.get('TITLE_LONG' , 'Default Title') return render_template( "file_access.html", timeframe=session['timeframe'], top20 = top20, admin_enabled=auth.is_admin(), title_short=title_short, title_long=title_long ) def export_to_excel(): """Export search_db to an Excel file and store it locally.""" # Query all data from the search_db query = "SELECT * FROM files" cursor = search_db.cursor() cursor.execute(query) rows = cursor.fetchall() # Get column names from the cursor description column_names = [description[0] for description in cursor.description] # Create a DataFrame and save it to an Excel file df = pd.DataFrame(rows, columns=column_names) df = df.drop(columns=['transcript'], errors='ignore') # Drop the 'id' column if it exists df.to_excel("search_db.xlsx", index=False) # Close the cursor and database connection cursor.close() if __name__ == "__main__": print("Running as a standalone script.") export_to_excel() print("Exported search_db to search_db.xlsx")