This commit is contained in:
lelo 2025-04-01 19:28:02 +02:00
commit 1a55d187fc
12 changed files with 505 additions and 253 deletions

7
.gitignore vendored
View File

@ -1,5 +1,12 @@
/venv
/filecache
/filecache_audio
/filecache_image
/filecache_video
/filecache_other
/postgres_data
/instance
/__pycache__
/access_log.db
/folder_config.json
/.env

View File

@ -1,13 +1,37 @@
from flask import render_template, request, session
import sqlite3
from datetime import datetime, date, timedelta
from flask import render_template, request
from datetime import datetime, timedelta
import geoip2.database
from urllib.parse import urlparse, unquote
from auth import require_secret
import os
file_access_temp = []
# Example database name; you can change to whatever you want:
DB_NAME = 'access_log.db'
# Create a single global connection to SQLite
log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
def init_log_db():
"""Create the file_access_log table if it doesn't already exist."""
with log_db:
log_db.execute('''
CREATE TABLE IF NOT EXISTS file_access_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT,
rel_path TEXT,
filesize INTEGER,
mime TEXT,
ip_address TEXT,
user_agent TEXT,
device_id TEXT,
cached BOOLEAN
)
''')
init_log_db()
def lookup_location(ip, reader):
try:
response = reader.city(ip)
@ -18,7 +42,7 @@ def lookup_location(ip, reader):
return "Unknown", "Unknown"
def get_device_type(user_agent):
"classify device type based on user agent string"
"""Classify device type based on user agent string."""
if 'Android' in user_agent:
return 'Android'
elif 'iPhone' in user_agent or 'iPad' in user_agent:
@ -32,52 +56,30 @@ def get_device_type(user_agent):
else:
return 'Other'
def shorten_referrer(url):
segments = [seg for seg in url.split('/') if seg]
segment = segments[-1]
# Decode all percent-encoded characters (like %20, %2F, etc.)
segment_decoded = unquote(segment)
return segment_decoded
def log_file_access(full_path, ip_address, user_agent, referrer):
"""
Log file access details to a SQLite database.
Records the timestamp, full file path, client IP, user agent, and referrer.
"""
def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
"""Insert a file access record into the database."""
global file_access_temp
# Connect to the database (this will create the file if it doesn't exist)
conn = sqlite3.connect('access_log.db')
cursor = conn.cursor()
# Create the table if it doesn't exist
cursor.execute('''
CREATE TABLE IF NOT EXISTS file_access_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT,
full_path TEXT,
ip_address TEXT,
user_agent TEXT,
referrer TEXT
)
''')
# Gather information from the request
timestamp = datetime.now().isoformat()
timestamp = datetime.now() # a datetime object
# Insert the access record into the database
cursor.execute('''
INSERT INTO file_access_log (timestamp, full_path, ip_address, user_agent, referrer)
VALUES (?, ?, ?, ?, ?)
''', (timestamp, full_path, ip_address, user_agent, referrer))
conn.commit()
conn.close()
file_access_temp.insert(0, [timestamp, full_path, ip_address, user_agent, referrer])
return return_file_access()
# Store the ISO timestamp in the database for easy lexical comparison
iso_ts = timestamp.isoformat()
with log_db:
log_db.execute('''
INSERT INTO file_access_log
(timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
file_access_temp.insert(0, [iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached])
return iso_ts
def return_file_access():
"""Return recent file access logs from memory (the last 10 minutes)."""
global file_access_temp
if len(file_access_temp) > 0:
# Compute the cutoff time (10 minutes ago from now)
if file_access_temp:
cutoff_time = datetime.now() - timedelta(minutes=10)
# Update the list in-place to keep only entries newer than 10 minutes
# Convert each stored timestamp (ISO string) back to datetime
file_access_temp[:] = [
entry for entry in file_access_temp
if datetime.fromisoformat(entry[0]) >= cutoff_time
@ -92,140 +94,240 @@ def connections():
@require_secret
def dashboard():
filetype_arg = request.args.get('filetype', 'audio')
timeframe = request.args.get('timeframe', 'today')
now = datetime.now()
# Determine which file type we're filtering by.
filetype = 'other'
# Some simplistic sets to decide how we match the MIME type
audio_list = ['mp3', 'wav', 'audio']
image_list = ['jpg', 'jpeg', 'image', 'photo']
video_list = ['mp4', 'mov', 'wmv', 'avi']
if filetype_arg.lower() in audio_list:
filetype = 'audio/'
elif filetype_arg.lower() in image_list:
filetype = 'image/'
elif filetype_arg.lower() in video_list:
filetype = 'video/'
# Determine start time based on timeframe
if timeframe == 'today':
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0)
elif timeframe == '7days':
start = now - timedelta(days=7)
start_dt = now - timedelta(days=7)
elif timeframe == '30days':
start = now - timedelta(days=30)
start_dt = now - timedelta(days=30)
elif timeframe == '365days':
start = now - timedelta(days=365)
start_dt = now - timedelta(days=365)
else:
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0)
conn = sqlite3.connect('access_log.db')
cursor = conn.cursor()
# We'll compare the textual timestamp (ISO 8601).
start_str = start_dt.isoformat()
# Raw file access counts for the table (top files)
cursor.execute('''
SELECT full_path, COUNT(*) as access_count
# Build the SQL filter
if filetype == 'other':
# Exclude audio, image, video
filetype_filter_sql = (
"AND mime NOT LIKE 'audio/%' "
"AND mime NOT LIKE 'image/%' "
"AND mime NOT LIKE 'video/%' "
)
params_for_filter = (start_str,)
else:
# Filter for mimes that start with the given type
filetype_filter_sql = "AND mime LIKE ?"
params_for_filter = (start_str, filetype + '%')
# 1. Top files by access count
query = f'''
SELECT rel_path, COUNT(*) as access_count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY full_path
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY rel_path
ORDER BY access_count DESC
LIMIT 20
''', (start.isoformat(),))
rows = cursor.fetchall()
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
rows = cursor.fetchall()
# Daily access trend for a line chart
cursor.execute('''
SELECT date(timestamp) as date, COUNT(*) as count
# 2. Daily access trend (line chart)
# We'll group by day using substr(timestamp, 1, 10) -> YYYY-MM-DD
query = f'''
SELECT substr(timestamp, 1, 10) AS date, COUNT(*) AS count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY date
ORDER BY date
''', (start.isoformat(),))
daily_access_data = [dict(date=row[0], count=row[1]) for row in cursor.fetchall()]
# Top files for bar chart
cursor.execute('''
SELECT full_path, COUNT(*) as access_count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY full_path
ORDER BY access_count DESC
LIMIT 10
''', (start.isoformat(),))
top_files_data = [dict(full_path=row[0], access_count=row[1]) for row in cursor.fetchall()]
# User agent distribution (aggregate by device type)
cursor.execute('''
SELECT user_agent, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY user_agent
ORDER BY count DESC
''', (start.isoformat(),))
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
device_counts = {}
for entry in raw_user_agents:
device = get_device_type(entry['user_agent'])
device_counts[device] = device_counts.get(device, 0) + entry['count']
# Rename to user_agent_data for compatibility with the frontend
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
# Referrer distribution (shorten links)
cursor.execute('''
SELECT referrer, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY referrer
ORDER BY count DESC
LIMIT 10
''', (start.isoformat(),))
referrer_data = []
for row in cursor.fetchall():
raw_ref = row[0]
shortened = shorten_referrer(raw_ref) if raw_ref else "Direct/None"
referrer_data.append(dict(referrer=shortened, count=row[1]))
# Aggregate IP addresses with counts
cursor.execute('''
SELECT ip_address, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY ip_address
ORDER BY count DESC
''', (start.isoformat(),))
ip_rows = cursor.fetchall()
# Initialize GeoIP2 reader once for efficiency
reader = geoip2.database.Reader('GeoLite2-City.mmdb')
location_data = {}
for ip, count in ip_rows:
country, city = lookup_location(ip, reader)
key = (country, city)
if key in location_data:
location_data[key] += count
else:
location_data[key] = count
reader.close()
# Convert the dictionary to a list of dictionaries
location_data = [
dict(country=key[0], city=key[1], count=value)
for key, value in location_data.items()
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
daily_rows = cursor.fetchall()
daily_access_data = [
dict(date=r[0], count=r[1]) for r in daily_rows
]
# Sort by count in descending order and take the top 20
# 3. Timeframe-based aggregation
# We'll group by hour if "today", by day if "7days"/"30days", by month if "365days".
if timeframe == 'today':
# Hour: substr(timestamp, 12, 2) -> HH
query = f'''
SELECT substr(timestamp, 12, 2) AS bucket, COUNT(*) AS count
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
'''
elif timeframe in ('7days', '30days'):
# Day: substr(timestamp, 1, 10) -> YYYY-MM-DD
query = f'''
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
'''
elif timeframe == '365days':
# Month: substr(timestamp, 1, 7) -> YYYY-MM
query = f'''
SELECT substr(timestamp, 1, 7) AS bucket, COUNT(*) AS count
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
'''
else:
# Default: group by day
query = f'''
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
timeframe_data_rows = cursor.fetchall()
timeframe_data = [
dict(bucket=r[0], count=r[1]) for r in timeframe_data_rows
]
# 4. User agent distribution
query = f'''
SELECT user_agent, COUNT(*) AS count
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY user_agent
ORDER BY count DESC
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
raw_user_agents = cursor.fetchall()
device_counts = {}
for (ua, cnt) in raw_user_agents:
device = get_device_type(ua)
device_counts[device] = device_counts.get(device, 0) + cnt
user_agent_data = [
dict(device=d, count=c) for d, c in device_counts.items()
]
# 5. Parent folder distribution
query = f'''
SELECT rel_path, COUNT(*) AS count
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY rel_path
ORDER BY count DESC
'''
folder_data_dict = {}
with log_db:
cursor = log_db.execute(query, params_for_filter)
for (rp, c) in cursor.fetchall():
if '/' in rp:
parent_folder = rp.rsplit('/', 1)[0]
else:
parent_folder = "Root"
folder_data_dict[parent_folder] = folder_data_dict.get(parent_folder, 0) + c
folder_data = [dict(folder=f, count=cnt) for f, cnt in folder_data_dict.items()]
folder_data.sort(key=lambda x: x['count'], reverse=True)
folder_data = folder_data[:10]
# 6. Aggregate IP addresses with counts
query = f'''
SELECT ip_address, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY ip_address
ORDER BY count DESC
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
ip_rows = cursor.fetchall()
# 7. Summary stats
# total_accesses
query = f'''
SELECT COUNT(*)
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
total_accesses = cursor.fetchone()[0]
# unique_files
query = f'''
SELECT COUNT(DISTINCT rel_path)
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
unique_files = cursor.fetchone()[0]
# unique_user
query = f'''
SELECT COUNT(DISTINCT device_id)
FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql}
'''
with log_db:
cursor = log_db.execute(query, params_for_filter)
unique_user = cursor.fetchone()[0]
# 8. Process location data with GeoIP2
reader = geoip2.database.Reader('GeoLite2-City.mmdb')
location_data_dict = {}
for (ip_addr, cnt) in ip_rows:
country, city = lookup_location(ip_addr, reader)
key = (country, city)
location_data_dict[key] = location_data_dict.get(key, 0) + cnt
reader.close()
location_data = [
dict(country=k[0], city=k[1], count=v)
for k, v in location_data_dict.items()
]
location_data.sort(key=lambda x: x['count'], reverse=True)
location_data = location_data[:20]
# Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
total_accesses = cursor.fetchone()[0]
# Convert the top-files rows to a list of dictionaries
rows = [dict(rel_path=r[0], access_count=r[1]) for r in rows]
# Use a separate query to count unique files (distinct full_path values)
cursor.execute('SELECT COUNT(DISTINCT full_path) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_files = cursor.fetchone()[0]
# Use a separate query to count unique IP addresses
cursor.execute('SELECT COUNT(DISTINCT ip_address) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_ips = cursor.fetchone()[0]
conn.close()
return render_template("dashboard.html",
timeframe=timeframe,
rows=rows,
daily_access_data=daily_access_data,
top_files_data=top_files_data,
user_agent_data=user_agent_data,
referrer_data=referrer_data,
location_data=location_data,
total_accesses=total_accesses,
unique_files=unique_files,
unique_ips=unique_ips)
return render_template(
"dashboard.html",
timeframe=timeframe,
rows=rows,
daily_access_data=daily_access_data,
user_agent_data=user_agent_data,
folder_data=folder_data,
location_data=location_data,
total_accesses=total_accesses,
unique_files=unique_files,
unique_user=unique_user,
timeframe_data=timeframe_data
)

155
app.py
View File

@ -4,7 +4,6 @@ from PIL import Image
import io
from functools import wraps
import mimetypes
import sqlite3
from datetime import datetime, date, timedelta
import diskcache
import threading
@ -15,12 +14,15 @@ import geoip2.database
from functools import lru_cache
from urllib.parse import urlparse, unquote
from werkzeug.middleware.proxy_fix import ProxyFix
import re
import auth
import analytics as a
cache = diskcache.Cache('./filecache', size_limit= 48 * 1024**3) # 48 GB limit
cache_audio = diskcache.Cache('./filecache_audio', size_limit= 48 * 1024**3) # 48 GB limit
cache_image = diskcache.Cache('./filecache_image', size_limit= 48 * 1024**3) # 48 GB limit
cache_video = diskcache.Cache('./filecache_video', size_limit= 48 * 1024**3) # 48 GB limit
cache_other = diskcache.Cache('./filecache_other', size_limit= 48 * 1024**3) # 48 GB limit
app = Flask(__name__)
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1)
@ -36,7 +38,17 @@ app.add_url_rule('/connections', view_func=a.connections)
app.add_url_rule('/mylinks', view_func=auth.mylinks)
app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'])
socketio = SocketIO(app, async_mode='eventlet')
# Grab the HOST_RULE environment variable
host_rule = os.getenv("HOST_RULE", "")
# Use a regex to extract domain names between backticks in patterns like Host(`something`)
pattern = r"Host\(`([^`]+)`\)"
allowed_domains = re.findall(pattern, host_rule)
socketio = SocketIO(
app,
async_mode='eventlet',
cors_allowed_origins=allowed_domains
)
background_thread_running = False
# Global variables to track the number of connected clients and the background thread
@ -76,8 +88,8 @@ def list_directory_contents(directory, subpath):
transcription_exists = os.path.isdir(transcription_dir)
# Define allowed file extensions.
allowed_music_exts = ('.mp3',)
allowed_image_exts = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')
music_exts = ('.mp3',)
image_exts = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')
try:
with os.scandir(directory) as it:
@ -94,27 +106,31 @@ def list_directory_contents(directory, subpath):
directories.append({'name': entry.name, 'path': rel_path.replace(os.sep, '/')})
elif entry.is_file(follow_symlinks=False):
lower_name = entry.name.lower()
if lower_name.endswith(allowed_music_exts) or lower_name.endswith(allowed_image_exts):
rel_path = os.path.join(subpath, entry.name) if subpath else entry.name
if lower_name.endswith(allowed_music_exts):
file_type = 'music'
else:
file_type = 'image'
file_entry = {'name': entry.name, 'path': rel_path.replace(os.sep, '/'), 'file_type': file_type}
# Only check for transcription if it's a audio file.
if file_type == 'music' and transcription_exists:
base_name = os.path.splitext(entry.name)[0]
transcript_filename = base_name + '.md'
transcript_path = os.path.join(transcription_dir, transcript_filename)
if os.path.isfile(transcript_path):
file_entry['has_transcript'] = True
transcript_rel_path = os.path.join(subpath, "Transkription", transcript_filename) if subpath else os.path.join("Transkription", transcript_filename)
file_entry['transcript_url'] = url_for('get_transcript', subpath=transcript_rel_path.replace(os.sep, '/'))
else:
file_entry['has_transcript'] = False
# implement file type filtering here !!!
#if lower_name.endswith(music_exts) or lower_name.endswith(image_exts):
rel_path = os.path.join(subpath, entry.name) if subpath else entry.name
if lower_name.endswith(music_exts):
file_type = 'music'
elif lower_name.endswith(image_exts):
file_type = 'image'
else:
file_type = 'other'
file_entry = {'name': entry.name, 'path': rel_path.replace(os.sep, '/'), 'file_type': file_type}
# Only check for transcription if it's a audio file.
if file_type == 'music' and transcription_exists:
base_name = os.path.splitext(entry.name)[0]
transcript_filename = base_name + '.md'
transcript_path = os.path.join(transcription_dir, transcript_filename)
if os.path.isfile(transcript_path):
file_entry['has_transcript'] = True
transcript_rel_path = os.path.join(subpath, "Transkription", transcript_filename) if subpath else os.path.join("Transkription", transcript_filename)
file_entry['transcript_url'] = url_for('get_transcript', subpath=transcript_rel_path.replace(os.sep, '/'))
else:
file_entry['has_transcript'] = False
files.append(file_entry)
else:
file_entry['has_transcript'] = False
files.append(file_entry)
except PermissionError:
pass
@ -190,43 +206,48 @@ def serve_file(subpath):
mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream'
if mime and mime.startswith('image/'):
pass # do not log access to images
else:
# HEAD request are coming in to initiate server caching.
# only log initial hits and not the reload of further file parts
range_header = request.headers.get('Range')
# only request with starting from the beginning of the file will be tracked
# no range -> full file not just the first byte
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
referrer = request.headers.get('Referer')
threading.Thread(
target=a.log_file_access,
args=(full_path, ip_address, user_agent, referrer)
).start()
range_header = request.headers.get('Range')
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
# Check cache first (using diskcache)
response = None
# determine the cache to use based on the file type
if mime and mime.startswith('audio/'):
cache = cache_audio
elif mime and mime.startswith('image/'):
cache = cache_image
elif mime and mime.startswith('video/'):
cache = cache_video
else:
cache = cache_other
# Check if the file is already cached
cached = cache.get(subpath)
if cached:
cached_file_bytes, mime = cached
cached_file = io.BytesIO(cached_file_bytes)
filesize = len(cached_file.getbuffer())
response = send_file(cached_file, mimetype=mime)
else:
if mime and mime.startswith('image/'):
# Image processing branch (with caching)
try:
with Image.open(full_path) as img:
img.thumbnail((1200, 1200))
img_bytes = io.BytesIO()
img.save(img_bytes, format='PNG', quality=85)
img_bytes = img_bytes.getvalue()
cache.set(subpath, (img_bytes, mime))
response = send_file(io.BytesIO(img_bytes), mimetype=mime, conditional=True)
img.thumbnail((1920, 1920))
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
output_format = 'JPEG'
output_mime = 'image/jpeg'
save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO()
filesize = len(img_bytes_io.getbuffer())
img.save(img_bytes_io, format=output_format, **save_kwargs)
thumb_bytes = img_bytes_io.getvalue()
cache.set(subpath, (thumb_bytes, output_mime))
response = send_file(io.BytesIO(thumb_bytes), mimetype=output_mime, conditional=True)
except Exception as e:
app.logger.error(f"Image processing failed for {subpath}: {e}")
abort(500)
@ -236,13 +257,29 @@ def serve_file(subpath):
with open(full_path, 'rb') as f:
file_bytes = f.read()
cache.set(subpath, (file_bytes, mime))
response = send_file(io.BytesIO(file_bytes), mimetype=mime, conditional=True)
file_bytes_io = io.BytesIO(file_bytes)
filesize = len(file_bytes_io.getbuffer())
response = send_file(file_bytes_io, mimetype=mime, conditional=True)
except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}")
abort(500)
# Set Cache-Control header (browser caching for 1 day)
response.headers['Cache-Control'] = 'public, max-age=86400'
if mime and mime.startswith('audio/mpeg'): # special rules for mp3 files
# HEAD request are coming in to initiate server caching. Ignore HEAD Request. Only log GET request.
# log access if there is no range header. # log access if range request starts from 0 but is larger then only from 0 to 1 (bytes=0-1)
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
logging = True
else:
logging = False
else:
logging = True
if logging:
a.log_file_access(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached))
return response
@ -327,8 +364,11 @@ def query_recent_connections():
{
'timestamp': datetime.strptime(row[0], '%Y-%m-%dT%H:%M:%S.%f').strftime('%d.%m.%Y %H:%M:%S'),
'full_path': row[1],
'ip_address': row[2],
'user_agent': row[3]
'filesize' : row[2],
'mime_typ' : row[3],
'ip_address': row[4],
'user_agent': row[5],
'cached': row[7]
}
for row in rows
]
@ -365,8 +405,11 @@ def handle_request_initial_data():
{
'timestamp': datetime.strptime(row[0], '%Y-%m-%dT%H:%M:%S.%f').strftime('%d.%m.%Y %H:%M:%S'),
'full_path': row[1],
'ip_address': row[2],
'user_agent': row[3]
'filesize' : row[2],
'mime_typ' : row[3],
'ip_address': row[4],
'user_agent': row[5],
'cached': row[7]
}
for row in rows
]
@ -377,7 +420,9 @@ def handle_request_initial_data():
@app.route('/<path:path>')
@auth.require_secret
def index(path):
return render_template("app.html")
title_short = os.environ.get('TITLE_SHORT', 'Default Title')
title_long = os.environ.get('TITLE_LONG', 'Default Title')
return render_template("app.html", title_short=title_short, title_long=title_long)
if __name__ == '__main__':
socketio.run(app, debug=True, host='0.0.0.0')

View File

@ -2,6 +2,7 @@ from flask import Flask, render_template, request, redirect, url_for, session
from functools import wraps
from datetime import datetime, date, timedelta
import io
import os
import json
import qrcode
import base64
@ -72,6 +73,10 @@ def require_secret(f):
# 6) If we have folders, proceed; otherwise show index
if session['folders']:
# assume since visitor has a valid secret, they are ok with annonymous tracking
# this is required to track the devices connecting over the same ip address
if 'device_id' not in session:
session['device_id'] = os.urandom(32).hex()
return f(*args, **kwargs)
else:
return render_template('index.html')

View File

@ -16,15 +16,13 @@ SERVER1_MOUNT_POINTS=(
"/mnt/Gottesdienste Speyer"
"/mnt/Besondere Gottesdienste"
"/mnt/Liedersammlung"
"/mnt/Jungschar"
"/mnt/Jugend"
"/mnt/app_share"
)
SERVER1_NFS_SHARES=(
"/volume1/Aufnahme-stereo/010 Gottesdienste ARCHIV"
"/volume1/Aufnahme-stereo/013 Besondere Gottesdienste"
"/volume1/Aufnahme-stereo/014 Liedersammlung"
"/volume1/app.share/Jungschar"
"/volume1/app.share/Jugend"
"/volume1/app_share"
)
# Server 2 Configuration
@ -124,7 +122,7 @@ for server in "${SERVERS[@]}"; do
# Mount the NFS share if it's not already mounted.
if ! is_nfs_mounted "${MOUNT_POINT}"; then
echo "[INFO] NFS share is not mounted at ${MOUNT_POINT}. Attempting to mount..."
sudo mount -t nfs -o port="${LOCAL_PORT}",nolock,soft 127.0.0.1:"${NFS_SHARE}" "${MOUNT_POINT}"
sudo mount -t nfs -o ro,port="${LOCAL_PORT}",nolock,soft,timeo=5,retrans=3 127.0.0.1:"${NFS_SHARE}" "${MOUNT_POINT}"
if is_nfs_mounted "${MOUNT_POINT}"; then
echo "[SUCCESS] NFS share mounted successfully at ${MOUNT_POINT}."
else

View File

@ -1,7 +1,7 @@
services:
flask-app:
image: python:3.11-slim
container_name: bethaus-app
container_name: ${CONTAINER_NAME}
restart: always
working_dir: /app
volumes:
@ -17,31 +17,34 @@ services:
environment:
- FLASK_APP=app.py
- FLASK_ENV=production
- TITLE_SHORT=${TITLE_SHORT}
- TITLE_LONG=${TITLE_LONG}
networks:
- traefik
labels:
- "traefik.enable=true"
# HTTP router (port 80), redirecting to HTTPS
- "traefik.http.routers.bethaus-app.rule=Host(`app.bethaus-speyer.de`)"
- "traefik.http.routers.bethaus-app.entrypoints=web"
- "traefik.http.routers.bethaus-app.middlewares=redirect-to-https"
- "traefik.http.routers.${CONTAINER_NAME}.rule=${HOST_RULE}"
- "traefik.http.routers.${CONTAINER_NAME}.entrypoints=web"
- "traefik.http.routers.${CONTAINER_NAME}.middlewares=redirect-to-https"
- "traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https"
# HTTPS router (TLS via Let's Encrypt)
- "traefik.http.routers.bethaus-app-secure.rule=Host(`app.bethaus-speyer.de`)"
- "traefik.http.routers.bethaus-app-secure.entrypoints=websecure"
- "traefik.http.routers.bethaus-app-secure.tls=true"
- "traefik.http.routers.bethaus-app-secure.tls.certresolver=myresolver"
- "traefik.http.routers.${CONTAINER_NAME}-secure.rule=${HOST_RULE}"
- "traefik.http.routers.${CONTAINER_NAME}-secure.entrypoints=websecure"
- "traefik.http.routers.${CONTAINER_NAME}-secure.tls=true"
- "traefik.http.routers.${CONTAINER_NAME}-secure.tls.certresolver=myresolver"
# Internal port
- "traefik.http.services.bethaus-app.loadbalancer.server.port=5000"
- "traefik.http.services.${CONTAINER_NAME}.loadbalancer.server.port=5000"
# Production-ready Gunicorn command with eventlet
command: >
sh -c "pip install -r requirements.txt &&
gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app"
networks:
traefik:
external: true

View File

@ -102,7 +102,7 @@ div.directory-item a, li.directory-item a, li.file-item a {
}
.directories-grid .directory-item {
background-color: #fff;
padding: 15px 10px;
padding: 15px;
border-radius: 5px;
text-align: center;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
@ -114,7 +114,7 @@ div.directory-item a, li.directory-item a, li.file-item a {
grid-template-columns: 1fr auto;
align-items: center;
margin: 10px 0;
padding: 15px 10px;
padding: 15px;
background-color: #fff;
border-radius: 5px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);

View File

@ -1,6 +1,7 @@
// Define global variables to track music files and the current index.
let currentMusicFiles = []; // Array of objects with at least { path, index }
let currentMusicIndex = -1; // Index of the current music file
let currentTrackPath = "";
// Helper function: decode each segment then re-encode to avoid double encoding.
function encodeSubpath(subpath) {
@ -15,6 +16,19 @@ function encodeSubpath(subpath) {
let currentGalleryImages = [];
function paintFile() {
// Highlight the currently playing file
if (currentTrackPath) {
const currentMusicFile = currentMusicFiles.find(file => file.path === currentTrackPath);
if (currentMusicFile) {
const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`);
if (currentMusicFileElement) {
currentMusicFileElement.closest('.file-item').classList.add('currently-playing');
}
}
}
}
function renderContent(data) {
// Render breadcrumbs, directories (grid view when appropriate), and files.
@ -32,8 +46,8 @@ function renderContent(data) {
let contentHTML = '';
if (data.directories.length > 0) {
contentHTML += '<ul>';
// Check if every directory name is short (≤15 characters)
const areAllShort = data.directories.every(dir => dir.name.length <= 15);
// Check if every directory name is short (≤15 characters) and no files are present
const areAllShort = data.directories.every(dir => dir.name.length <= 15) && data.files.length === 0;
if (areAllShort) {
contentHTML += '<div class="directories-grid">';
data.directories.forEach(dir => {
@ -62,9 +76,7 @@ function renderContent(data) {
symbol = '🖼️';
}
const indexAttr = file.file_type === 'music' ? ` data-index="${currentMusicFiles.length - 1}"` : '';
// preserve currently-playing class during reloads
const isCurrentlyPlaying = file.file_type === 'music' && currentMusicIndex === currentMusicFiles.length - 1 ? ' currently-playing' : '';
contentHTML += `<li class="file-item ${isCurrentlyPlaying}">
contentHTML += `<li class="file-item">
<a href="#" class="play-file"${indexAttr} data-url="${file.path}" data-file-type="${file.file_type}">${symbol} ${file.name.replace('.mp3', '')}</a>`;
if (file.has_transcript) {
contentHTML += `<a href="#" class="show-transcript" data-url="${file.transcript_url}" title="Show Transcript">&#128196;</a>`;
@ -132,6 +144,7 @@ function loadDirectory(subpath) {
.then(response => response.json())
.then(data => {
renderContent(data);
paintFile();
return data; // return data for further chaining
})
.catch(error => {
@ -250,6 +263,7 @@ document.querySelectorAll('.play-file').forEach(link => {
audioPlayer.src = mediaUrl;
audioPlayer.load();
await audioPlayer.play();
currentTrackPath = relUrl;
playerButton.innerHTML = pauseIcon;
// Process file path for display.
@ -286,6 +300,9 @@ document.querySelectorAll('.play-file').forEach(link => {
} else if (fileType === 'image') {
// Open the gallery modal for image files.
openGalleryModal(relUrl);
} else {
// serve like a download
window.location.href = `/media/${relUrl}`;
}
});
});
@ -412,8 +429,6 @@ if ('mediaSession' in navigator) {
}
document.getElementById('globalAudio').addEventListener('ended', () => {
// Save the current track's path (if any)
const currentTrackPath = currentMusicFiles[currentMusicIndex] ? currentMusicFiles[currentMusicIndex].path : null;
reloadDirectory().then(() => {
@ -436,7 +451,7 @@ document.getElementById('globalAudio').addEventListener('ended', () => {
});
});
document.addEventListener("DOMContentLoaded", function() {
// Automatically reload every 5 minutes (300,000 milliseconds)
setInterval(reloadDirectory, 300000);
});
// document.addEventListener("DOMContentLoaded", function() {
// // Automatically reload every 5 minutes (300,000 milliseconds)
// setInterval(reloadDirectory, 300000);
// });

View File

@ -8,7 +8,7 @@
<meta property="og:image" content="https://app.bethaus-speyer.de/static/icons/logo-200x200.png" />
<meta property="og:url" content="https://app.bethaus-speyer.de" />
<title>Gottesdienste</title>
<title>{{ title_short }}</title>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
<meta name="description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft.">
<meta name="author" content="Bethaus Speyer">
@ -38,7 +38,7 @@
<a href="#">
<img src="/static/logoW.png" alt="Logo" class="logo">
</a>
<h1>Gottesdienste Speyer und Schwegenheim</h1>
<h1>{{ title_long }}</h1>
</header>
<div class="wrapper">
<div class="container">

View File

@ -41,6 +41,9 @@
<th>IP Address</th>
<th>User Agent</th>
<th>File Path</th>
<td>File Size</td>
<td>MIME-Typ</td>
<td>Cached</td>
</tr>
</thead>
<tbody id="connectionsTableBody">
@ -71,6 +74,9 @@
<td>${record.ip_address}</td>
<td>${record.user_agent}</td>
<td>${record.full_path}</td>
<td>${record.filesize}</td>
<td>${record.mime_typ}</td>
<td>${record.cached}</td>
`;
tbody.appendChild(row);
});

View File

@ -58,7 +58,7 @@
<div class="card text-white bg-warning">
<div class="card-body">
<h5 class="card-title">eindeutige Nutzer</h5>
<p class="card-text">{{ unique_ips }}</p>
<p class="card-text">{{ unique_user }}</p>
</div>
</div>
</div>
@ -75,12 +75,12 @@
</div>
</div>
</div>
<!-- Top Files Accessed Chart -->
<!-- Timeframe Breakdown Chart (Bar Chart) -->
<div class="col-md-6 mb-4">
<div class="card">
<div class="card-body">
<h5 class="card-title">Häufig geladene Dateien</h5>
<canvas id="topFilesChart"></canvas>
<h5 class="card-title">Downloads nach Zeit</h5>
<canvas id="timeframeChart"></canvas>
</div>
</div>
</div>
@ -93,12 +93,12 @@
</div>
</div>
</div>
<!-- Referrer Distribution Chart -->
<!-- Folder Distribution Chart -->
<div class="col-md-6 mb-4">
<div class="card">
<div class="card-body">
<h5 class="card-title">Verteilung auf Ordner</h5>
<canvas id="referrerChart"></canvas>
<canvas id="folderChart"></canvas>
</div>
</div>
</div>
@ -151,8 +151,8 @@
<tbody>
{% for row in rows %}
<tr>
<td>{{ row[0] }}</td>
<td>{{ row[1] }}</td>
<td>{{ row.rel_path }}</td>
<td>{{ row.access_count }}</td>
</tr>
{% else %}
<tr>
@ -171,10 +171,40 @@
<script>
// Data passed from the backend as JSON
const dailyAccessData = {{ daily_access_data|tojson }};
const topFilesData = {{ top_files_data|tojson }};
// Note: user_agent_data now contains 'device' and 'count'
// Replace topFilesData usage with timeframeData for this chart
const timeframeData = {{ timeframe_data|tojson }};
const userAgentData = {{ user_agent_data|tojson }};
const referrerData = {{ referrer_data|tojson }};
const folderData = {{ folder_data|tojson }};
// shift the labels to local time zone
const timeframe = "{{ timeframe }}"; // e.g., 'today', '7days', '30days', or '365days'
const shiftedLabels = timeframeData.map(item => {
if (timeframe === 'today') {
// For "today", the bucket is an hour in UTC (e.g., "14")
const utcHour = parseInt(item.bucket, 10);
const now = new Date();
// Create Date objects for the start and end of the hour in UTC
const utcStart = new Date(Date.UTC(now.getFullYear(), now.getMonth(), now.getDate(), utcHour));
const utcEnd = new Date(Date.UTC(now.getFullYear(), now.getMonth(), now.getDate(), utcHour + 1));
// Convert to local time strings, e.g., "16:00"
const localStart = utcStart.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
const localEnd = utcEnd.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
return `${localStart} - ${localEnd}`;
} else if (timeframe === '7days' || timeframe === '30days') {
// For these timeframes, the bucket is a date in the format "YYYY-MM-DD"
const utcDate = new Date(item.bucket + 'T00:00:00Z');
return utcDate.toLocaleDateString(); // Adjust formatting as needed
} else if (timeframe === '365days') {
// For this timeframe, the bucket is a month in the format "YYYY-MM"
const [year, month] = item.bucket.split('-');
const dateObj = new Date(year, month - 1, 1);
// Format to something like "Mar 2025"
return dateObj.toLocaleString([], { month: 'short', year: 'numeric' });
} else {
// Fallback: use the bucket value as-is
return item.bucket;
}
});
// Access Trend Chart - Line Chart
const ctxTrend = document.getElementById('accessTrendChart').getContext('2d');
@ -199,25 +229,24 @@
}
});
// Top Files Chart - Horizontal Bar Chart
const ctxTopFiles = document.getElementById('topFilesChart').getContext('2d');
new Chart(ctxTopFiles, {
// Timeframe Breakdown Chart - Bar Chart (for "today" timeframe)
const ctxTimeframe = document.getElementById('timeframeChart').getContext('2d');
new Chart(ctxTimeframe, {
type: 'bar',
data: {
labels: topFilesData.map(item => item.full_path),
labels: shiftedLabels,
datasets: [{
label: 'Download Count',
data: topFilesData.map(item => item.access_count),
data: timeframeData.map(item => item.count),
borderWidth: 1
}]
},
options: {
indexAxis: 'y',
responsive: true,
plugins: { legend: { display: false } },
scales: {
x: { title: { display: true, text: 'Download Count' } },
y: { title: { display: true, text: '' } }
x: { title: { display: true, text: 'Local Time Range' } },
y: { title: { display: true, text: 'Download Count' } }
}
}
});
@ -235,14 +264,14 @@
options: { responsive: true }
});
// Referrer Distribution - Pie Chart (with shortened referrers)
const ctxReferrer = document.getElementById('referrerChart').getContext('2d');
new Chart(ctxReferrer, {
// folder Distribution - Pie Chart (with shortened folders)
const ctxfolder = document.getElementById('folderChart').getContext('2d');
new Chart(ctxfolder, {
type: 'pie',
data: {
labels: referrerData.map(item => item.referrer),
labels: folderData.map(item => item.folder),
datasets: [{
data: referrerData.map(item => item.count)
data: folderData.map(item => item.count)
}]
},
options: { responsive: true }

42
transforme_sql.py Normal file
View File

@ -0,0 +1,42 @@
import sqlite3
def transform_database(db_path):
# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 1. Create the new table with the desired schema.
cursor.execute('''
CREATE TABLE IF NOT EXISTS file_access_log_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT,
rel_path TEXT,
ip_address TEXT,
user_agent TEXT,
device_id TEXT
)
''')
# 2. Copy data from the old table, applying the following transformations:
# - Rename full_path to rel_path and remove '/mnt/' from its entries.
# - Omit the referrer column.
# - Copy ip_address into the new device_id column.
cursor.execute('''
INSERT INTO file_access_log_new (id, timestamp, rel_path, ip_address, user_agent, device_id)
SELECT id, timestamp, REPLACE(full_path, '/mnt/', ''), ip_address, user_agent, ip_address
FROM file_access_log
''')
# 3. Drop the old table.
cursor.execute('DROP TABLE file_access_log')
# 4. Rename the new table to use the original table's name.
cursor.execute('ALTER TABLE file_access_log_new RENAME TO file_access_log')
# Commit the changes and close the connection.
conn.commit()
conn.close()
if __name__ == "__main__":
# Replace 'your_database.db' with the path to your SQLite database file.
transform_database("access_log.db")