553 lines
19 KiB
Python
553 lines
19 KiB
Python
import sqlite3
|
|
from flask import render_template, request, session
|
|
from datetime import datetime, timedelta, timezone
|
|
import geoip2.database
|
|
from auth import require_secret
|
|
from collections import defaultdict
|
|
import json
|
|
import os
|
|
import auth
|
|
|
|
file_access_temp = []
|
|
|
|
app_config = auth.return_app_config()
|
|
|
|
# Create a single global connection to SQLite
|
|
log_db = sqlite3.connect("access_log.db", check_same_thread=False)
|
|
search_db = sqlite3.connect("search.db", check_same_thread=False)
|
|
|
|
# geo location
|
|
geoReader = geoip2.database.Reader('GeoLite2-City.mmdb')
|
|
|
|
def init_log_db():
|
|
"""Create the file_access_log table if it doesn't already exist."""
|
|
with log_db:
|
|
log_db.execute('''
|
|
CREATE TABLE IF NOT EXISTS file_access_log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
timestamp TEXT,
|
|
rel_path TEXT,
|
|
filesize INTEGER,
|
|
mime TEXT,
|
|
city TEXT,
|
|
country TEXT,
|
|
user_agent TEXT,
|
|
device_id TEXT,
|
|
cached BOOLEAN
|
|
)
|
|
''')
|
|
|
|
init_log_db()
|
|
|
|
def lookup_location(ip):
|
|
try:
|
|
response = geoReader.city(ip)
|
|
country = response.country.name if response.country.name else "Unknown"
|
|
city = response.city.name if response.city.name else "Unknown"
|
|
return city, country
|
|
except Exception:
|
|
return "Unknown", "Unknown"
|
|
|
|
def get_device_type(user_agent):
|
|
"""Classify device type based on user agent string."""
|
|
if 'Android' in user_agent:
|
|
return 'Android'
|
|
elif 'iPhone' in user_agent or 'iPad' in user_agent:
|
|
return 'iOS'
|
|
elif 'Windows' in user_agent:
|
|
return 'Windows'
|
|
elif 'Macintosh' in user_agent or 'Mac OS' in user_agent:
|
|
return 'MacOS'
|
|
elif 'Linux' in user_agent:
|
|
return 'Linux'
|
|
else:
|
|
return 'Other'
|
|
|
|
def parse_timestamp(ts_str):
|
|
try:
|
|
# Try the normal ISO parsing.
|
|
return datetime.fromisoformat(ts_str)
|
|
except ValueError as e:
|
|
if 'unconverted data remains' in str(e):
|
|
# Find where the timezone starts. Look for a '+' or '-' after the time.
|
|
for sign in ['+', '-']:
|
|
pos = ts_str.find(sign)
|
|
if pos != -1:
|
|
# Assume the base part is up to pos and then the tz part
|
|
base = ts_str[:pos]
|
|
tz_part = ts_str[pos:]
|
|
# Remove any colon from the tz part to help with parsing.
|
|
tz_clean = tz_part.replace(':', '')
|
|
# Try parsing the base part. It might or might not have fractional seconds.
|
|
try:
|
|
dt = datetime.fromisoformat(base)
|
|
except ValueError:
|
|
dt = datetime.strptime(base, '%Y-%m-%dT%H:%M:%S')
|
|
# Extract hours and minutes from the tz portion.
|
|
try:
|
|
offset_hours = int(tz_clean[1:3])
|
|
offset_minutes = int(tz_clean[3:5])
|
|
except Exception:
|
|
raise ValueError(f"Unable to parse timezone from {ts_str}")
|
|
offset = timedelta(hours=offset_hours, minutes=offset_minutes)
|
|
if tz_clean[0] == '-':
|
|
offset = -offset
|
|
# Return a timezone-aware datetime.
|
|
return dt.replace(tzinfo=timezone(offset))
|
|
# If it's some other ValueError, re-raise it.
|
|
raise
|
|
|
|
def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
|
|
"""Insert a file access record into the database and prune entries older than 10 minutes."""
|
|
global file_access_temp
|
|
# Create a timezone-aware timestamp (local time with offset)
|
|
timestamp = datetime.now(timezone.utc).astimezone()
|
|
iso_ts = timestamp.isoformat()
|
|
|
|
# Convert the IP address to a location
|
|
city, country = lookup_location(ip_address)
|
|
|
|
with log_db:
|
|
log_db.execute('''
|
|
INSERT INTO file_access_log
|
|
(timestamp, rel_path, filesize, mime, city, country, user_agent, device_id, cached)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (iso_ts, rel_path, filesize, mime, city, country, user_agent, device_id, cached))
|
|
|
|
# Remove entries older than 10 minutes using our robust parser.
|
|
cutoff_time = datetime.now(timezone.utc).astimezone() - timedelta(minutes=10)
|
|
file_access_temp[:] = [
|
|
entry for entry in file_access_temp
|
|
if parse_timestamp(entry[0]) >= cutoff_time
|
|
]
|
|
|
|
# Add the new entry at the beginning of the list
|
|
file_access_temp.insert(0, [iso_ts, rel_path, filesize, mime, f"{city}, {country}", user_agent, device_id, cached])
|
|
return True
|
|
|
|
def return_file_access():
|
|
"""Return recent file access logs from memory (the last 10 minutes)."""
|
|
global file_access_temp
|
|
if file_access_temp:
|
|
# Create a timezone-aware cutoff time
|
|
cutoff_time = datetime.now(timezone.utc).astimezone() - timedelta(minutes=10)
|
|
# Only keep entries with timestamps greater than or equal to cutoff_time
|
|
file_access_temp[:] = [
|
|
entry for entry in file_access_temp
|
|
if datetime.fromisoformat(entry[0]) >= cutoff_time
|
|
]
|
|
return file_access_temp
|
|
else:
|
|
return []
|
|
|
|
def songs_dashboard():
|
|
# — SESSION & PARAM HANDLING (unchanged) —
|
|
if 'songs_dashboard_timeframe' not in session:
|
|
session['songs_dashboard_timeframe'] = "30"
|
|
timeframe_param = request.args.get("timeframe", session['songs_dashboard_timeframe'])
|
|
session['songs_dashboard_timeframe'] = timeframe_param
|
|
|
|
if 'songs_dashboard_category' not in session:
|
|
session['songs_dashboard_category'] = "Gemeinsamer Gesang"
|
|
category = request.args.get("category", session['songs_dashboard_category'])
|
|
session['songs_dashboard_category'] = category
|
|
|
|
if 'songs_dashboard_site' not in session:
|
|
session['songs_dashboard_site'] = "Speyer"
|
|
site = request.args.get("site", session['songs_dashboard_site'])
|
|
session['songs_dashboard_site'] = site
|
|
|
|
# — DETERMINE CUTOFF + TODAY STRINGS —
|
|
now = datetime.now()
|
|
params = [category, site]
|
|
date_clauses = []
|
|
if timeframe_param != "all":
|
|
cutoff = now - timedelta(days=int(timeframe_param))
|
|
date_clauses.append("performance_date >= ?")
|
|
params.append(cutoff.strftime("%Y-%m-%d"))
|
|
# filter out any future-dated rows at the DB level
|
|
date_clauses.append("performance_date <= ?")
|
|
params.append(now.strftime("%Y-%m-%d"))
|
|
|
|
where_sql = " AND ".join(["category = ?", "site = ?"] + date_clauses)
|
|
|
|
cursor = search_db.cursor()
|
|
cursor.execute(
|
|
f"SELECT titel, performance_date FROM files WHERE {where_sql}",
|
|
params
|
|
)
|
|
rows = cursor.fetchall()
|
|
|
|
# — AGGREGATE COUNTS + LAST-PERFORMED, WITH ERROR LOGGING —
|
|
performance_counts = defaultdict(int)
|
|
last_performed_dates = {}
|
|
|
|
for titel, perf_date_str in rows:
|
|
if not perf_date_str:
|
|
continue
|
|
|
|
perf_date_str = perf_date_str.strip()
|
|
try:
|
|
perf_date = datetime.strptime(perf_date_str, "%Y-%m-%d")
|
|
except ValueError:
|
|
print(f"[songs_dashboard] bad date format for “{titel}”: “{perf_date_str}”")
|
|
continue
|
|
|
|
performance_counts[titel] += 1
|
|
|
|
prev = last_performed_dates.get(titel)
|
|
if prev is None or perf_date > prev:
|
|
last_performed_dates[titel] = perf_date
|
|
|
|
# — BUILD LIST FOR TEMPLATE —
|
|
performance_data = []
|
|
for titel, count in performance_counts.items():
|
|
last_str = last_performed_dates[titel].strftime("%d.%m.%Y")
|
|
performance_data.append({
|
|
"titel": titel,
|
|
"count": count,
|
|
"last_performed": last_str
|
|
})
|
|
|
|
performance_data.sort(key=lambda x: x["count"], reverse=True)
|
|
|
|
# — RENDER —
|
|
return render_template(
|
|
'songs_dashboard.html',
|
|
timeframe=timeframe_param,
|
|
performance_data=performance_data,
|
|
site=site,
|
|
category=category,
|
|
admin_enabled=auth.is_admin(),
|
|
title_short=app_config.get('TITLE_SHORT', 'Default Title'),
|
|
title_long= app_config.get('TITLE_LONG', 'Default Title'),
|
|
)
|
|
|
|
|
|
|
|
@require_secret
|
|
def connections():
|
|
title_short = app_config.get('TITLE_SHORT', 'Default Title')
|
|
title_long = app_config.get('TITLE_LONG' , 'Default Title')
|
|
return render_template('connections.html',
|
|
admin_enabled=auth.is_admin(),
|
|
title_short=title_short,
|
|
title_long=title_long)
|
|
|
|
@require_secret
|
|
def dashboard():
|
|
if 'filetype' not in session:
|
|
session['filetype'] = 'audio'
|
|
if 'timeframe' not in session:
|
|
session['timeframe'] = 'last24hours'
|
|
session['filetype'] = request.args.get('filetype', session['filetype'])
|
|
session['timeframe'] = request.args.get('timeframe', session['timeframe'])
|
|
|
|
now = datetime.now()
|
|
|
|
# default filetype if not found
|
|
filetype = 'other'
|
|
# Some simplistic sets to decide how we match the MIME type
|
|
audio_list = ['mp3', 'wav', 'ton', 'audio']
|
|
image_list = ['jpg', 'jpeg', 'image', 'photo', 'bild', 'foto']
|
|
video_list = ['mp4', 'mov', 'wmv', 'avi', 'film', 'video']
|
|
|
|
if session['filetype'].lower() in audio_list:
|
|
filetype = 'audio/'
|
|
elif session['filetype'].lower() in image_list:
|
|
filetype = 'image/'
|
|
elif session['filetype'].lower() in video_list:
|
|
filetype = 'video/'
|
|
|
|
# Determine start time based on session['timeframe']
|
|
if session['timeframe'] == 'last24hours':
|
|
start_dt = now - timedelta(hours=24)
|
|
elif session['timeframe'] == '7days':
|
|
start_dt = now - timedelta(days=7)
|
|
elif session['timeframe'] == '30days':
|
|
start_dt = now - timedelta(days=30)
|
|
elif session['timeframe'] == '365days':
|
|
start_dt = now - timedelta(days=365)
|
|
else:
|
|
start_dt = now - timedelta(hours=24)
|
|
|
|
# We'll compare the textual timestamp (ISO 8601).
|
|
start_str = start_dt.isoformat()
|
|
|
|
# Build the SQL filter
|
|
if filetype == 'other':
|
|
# Exclude audio, image, video
|
|
filetype_filter_sql = (
|
|
"AND mime NOT LIKE 'audio/%' "
|
|
"AND mime NOT LIKE 'image/%' "
|
|
"AND mime NOT LIKE 'video/%' "
|
|
)
|
|
params_for_filter = (start_str,)
|
|
else:
|
|
# Filter for mimes that start with the given type
|
|
filetype_filter_sql = "AND mime LIKE ?"
|
|
params_for_filter = (start_str, filetype + '%')
|
|
|
|
# 1. Top files by access count
|
|
query = f'''
|
|
SELECT rel_path, COUNT(*) as access_count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY rel_path
|
|
ORDER BY access_count DESC
|
|
LIMIT 20
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
rows = cursor.fetchall()
|
|
|
|
# 2. Distinct device trend
|
|
# We'll group by hour if "today", by day if "7days"/"30days", by month if "365days"
|
|
if session['timeframe'] == 'last24hours':
|
|
# Group by hour: substr(timestamp, 12, 2) -> HH
|
|
query = f'''
|
|
SELECT strftime('%Y-%m-%dT%H:00:00Z', replace(timestamp, 'T', ' ')) AS bucket, COUNT(DISTINCT device_id) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
elif session['timeframe'] in ('7days', '30days'):
|
|
# Group by day: substr(timestamp, 1, 10) -> YYYY-MM-DD
|
|
query = f'''
|
|
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(DISTINCT device_id) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
elif session['timeframe'] == '365days':
|
|
# Group by month: substr(timestamp, 1, 7) -> YYYY-MM
|
|
query = f'''
|
|
SELECT substr(timestamp, 1, 7) AS bucket, COUNT(DISTINCT device_id) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
else:
|
|
# Default: group by day
|
|
query = f'''
|
|
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(DISTINCT device_id) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
distinct_device_data_rows = cursor.fetchall()
|
|
distinct_device_data = [
|
|
dict(bucket=r[0], count=r[1]) for r in distinct_device_data_rows
|
|
]
|
|
|
|
# 3. Download trend
|
|
# We'll group by hour if "today", by day if "7days"/"30days", by month if "365days".
|
|
if session['timeframe'] == 'last24hours':
|
|
# Hour: substr(timestamp, 12, 2) -> HH
|
|
query = f'''
|
|
SELECT strftime('%Y-%m-%dT%H:00:00Z', replace(timestamp, 'T', ' ')) AS bucket, COUNT(*) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
elif session['timeframe'] in ('7days', '30days'):
|
|
# Day: substr(timestamp, 1, 10) -> YYYY-MM-DD
|
|
query = f'''
|
|
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
elif session['timeframe'] == '365days':
|
|
# Month: substr(timestamp, 1, 7) -> YYYY-MM
|
|
query = f'''
|
|
SELECT substr(timestamp, 1, 7) AS bucket, COUNT(*) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
else:
|
|
# Default: group by day
|
|
query = f'''
|
|
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY bucket
|
|
ORDER BY bucket
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
timeframe_data_rows = cursor.fetchall()
|
|
timeframe_data = [
|
|
dict(bucket=r[0], count=r[1]) for r in timeframe_data_rows
|
|
]
|
|
|
|
# 4. User agent distribution: Count user_agent once per device_id
|
|
query = f'''
|
|
SELECT user_agent, COUNT(DISTINCT device_id) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY user_agent
|
|
ORDER BY count DESC
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
raw_user_agents = cursor.fetchall()
|
|
device_counts = {}
|
|
for (ua, cnt) in raw_user_agents:
|
|
device = get_device_type(ua)
|
|
device_counts[device] = device_counts.get(device, 0) + cnt
|
|
user_agent_data = [
|
|
dict(device=d, count=c) for d, c in device_counts.items()
|
|
]
|
|
|
|
# 5. Parent folder distribution
|
|
query = f'''
|
|
SELECT rel_path, COUNT(*) AS count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY rel_path
|
|
ORDER BY count DESC
|
|
'''
|
|
folder_data_dict = {}
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
for (rp, c) in cursor.fetchall():
|
|
if '/' in rp:
|
|
parent_folder = rp.rsplit('/', 1)[0]
|
|
else:
|
|
parent_folder = "Root"
|
|
folder_data_dict[parent_folder] = folder_data_dict.get(parent_folder, 0) + c
|
|
folder_data = [dict(folder=f, count=cnt) for f, cnt in folder_data_dict.items()]
|
|
folder_data.sort(key=lambda x: x['count'], reverse=True)
|
|
folder_data = folder_data[:10]
|
|
|
|
# 6. Aggregate locations with counts
|
|
query = f'''
|
|
SELECT city, country, COUNT(*) as count
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
GROUP BY city, country
|
|
ORDER BY count DESC
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
locations = cursor.fetchall()
|
|
|
|
# 7. Summary stats
|
|
# total_accesses
|
|
query = f'''
|
|
SELECT COUNT(*)
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
total_accesses = cursor.fetchone()[0]
|
|
|
|
# unique_files
|
|
query = f'''
|
|
SELECT COUNT(DISTINCT rel_path)
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
unique_files = cursor.fetchone()[0]
|
|
|
|
# unique_user
|
|
query = f'''
|
|
SELECT COUNT(DISTINCT device_id)
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
unique_user = cursor.fetchone()[0]
|
|
|
|
# Percentage of cached calls
|
|
query = f'''
|
|
SELECT (CAST(SUM(CASE WHEN cached = 1 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*)) * 100
|
|
FROM file_access_log
|
|
WHERE timestamp >= ? {filetype_filter_sql}
|
|
'''
|
|
with log_db:
|
|
cursor = log_db.execute(query, params_for_filter)
|
|
cached_percentage = cursor.fetchone()[0]
|
|
|
|
if cached_percentage is not None:
|
|
cached_percentage = f"{cached_percentage:.2f}"
|
|
|
|
# 8. Process location data
|
|
location_data_dict = {}
|
|
for (city, country, cnt) in locations:
|
|
key = (city, country)
|
|
location_data_dict[key] = location_data_dict.get(key, 0) + cnt
|
|
|
|
location_data = [
|
|
dict(city=k[0], country=k[1], count=v)
|
|
for k, v in location_data_dict.items()
|
|
]
|
|
location_data.sort(key=lambda x: x['count'], reverse=True)
|
|
location_data = location_data[:20]
|
|
|
|
# Convert the top-files rows to a list of dictionaries
|
|
rows = [dict(rel_path=r[0], access_count=r[1]) for r in rows]
|
|
|
|
title_short = app_config.get('TITLE_SHORT', 'Default Title')
|
|
title_long = app_config.get('TITLE_LONG' , 'Default Title')
|
|
|
|
return render_template(
|
|
"dashboard.html",
|
|
timeframe=session['timeframe'],
|
|
rows=rows,
|
|
distinct_device_data=distinct_device_data,
|
|
user_agent_data=user_agent_data,
|
|
folder_data=folder_data,
|
|
location_data=location_data,
|
|
total_accesses=total_accesses,
|
|
unique_files=unique_files,
|
|
unique_user=unique_user,
|
|
cached_percentage=cached_percentage,
|
|
timeframe_data=timeframe_data,
|
|
admin_enabled=auth.is_admin(),
|
|
title_short=title_short,
|
|
title_long=title_long
|
|
)
|
|
|
|
def export_to_excel():
|
|
"""Export search_db to an Excel file and store it locally."""
|
|
import pandas as pd
|
|
|
|
# Query all data from the search_db
|
|
query = "SELECT * FROM files"
|
|
cursor = search_db.cursor()
|
|
cursor.execute(query)
|
|
rows = cursor.fetchall()
|
|
|
|
# Get column names from the cursor description
|
|
column_names = [description[0] for description in cursor.description]
|
|
|
|
# Create a DataFrame and save it to an Excel file
|
|
df = pd.DataFrame(rows, columns=column_names)
|
|
df = df.drop(columns=['transcript'], errors='ignore') # Drop the 'id' column if it exists
|
|
df.to_excel("search_db.xlsx", index=False)
|
|
|
|
# Close the cursor and database connection
|
|
cursor.close()
|
|
|
|
if __name__ == "__main__":
|
|
print("Running as a standalone script.")
|
|
export_to_excel()
|
|
print("Exported search_db to search_db.xlsx")
|
|
|
|
|