From f22b2df7bbc78257187804bd0265c0f53e4258ba Mon Sep 17 00:00:00 2001 From: lelo Date: Sat, 22 Mar 2025 17:03:26 +0100 Subject: [PATCH] reorganize files --- analytics.py | 228 +++++++++++++++++++++++++++ app.py | 330 ++++----------------------------------- auth.py | 76 +++++++++ templates/dashboard.html | 1 - templates/network.html | 39 +++-- 5 files changed, 356 insertions(+), 318 deletions(-) create mode 100644 analytics.py create mode 100644 auth.py diff --git a/analytics.py b/analytics.py new file mode 100644 index 0000000..d5af7e1 --- /dev/null +++ b/analytics.py @@ -0,0 +1,228 @@ +from flask import render_template, request, session +import sqlite3 +from datetime import datetime, date, timedelta +import geoip2.database +from urllib.parse import urlparse, unquote + +from auth import require_secret + +file_access_temp = [] + +def lookup_location(ip, reader): + try: + response = reader.city(ip) + country = response.country.name if response.country.name else "Unknown" + city = response.city.name if response.city.name else "Unknown" + return country, city + except Exception: + return "Unknown", "Unknown" + +def get_device_type(user_agent): + "classify device type based on user agent string" + if 'Android' in user_agent: + return 'Android' + elif 'iPhone' in user_agent or 'iPad' in user_agent: + return 'iOS' + elif 'Windows' in user_agent: + return 'Windows' + elif 'Macintosh' in user_agent or 'Mac OS' in user_agent: + return 'MacOS' + elif 'Linux' in user_agent: + return 'Linux' + else: + return 'Other' + +def shorten_referrer(url): + segments = [seg for seg in url.split('/') if seg] + segment = segments[-1] + # Decode all percent-encoded characters (like %20, %2F, etc.) + segment_decoded = unquote(segment) + return segment_decoded + +def log_file_access(full_path): + """ + Log file access details to a SQLite database. + Records the timestamp, full file path, client IP, user agent, and referrer. + """ + global file_access_temp + # Connect to the database (this will create the file if it doesn't exist) + conn = sqlite3.connect('access_log.db') + cursor = conn.cursor() + # Create the table if it doesn't exist + cursor.execute(''' + CREATE TABLE IF NOT EXISTS file_access_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT, + full_path TEXT, + ip_address TEXT, + user_agent TEXT, + referrer TEXT + ) + ''') + # Gather information from the request + timestamp = datetime.now().isoformat() + ip_address = request.remote_addr + user_agent = request.headers.get('User-Agent') + referrer = request.headers.get('Referer') + + # Insert the access record into the database + cursor.execute(''' + INSERT INTO file_access_log (timestamp, full_path, ip_address, user_agent, referrer) + VALUES (?, ?, ?, ?, ?) + ''', (timestamp, full_path, ip_address, user_agent, referrer)) + conn.commit() + conn.close() + file_access_temp.insert(0, [timestamp, full_path, ip_address, user_agent, referrer]) + return return_file_access() + +def return_file_access(): + global file_access_temp + if len(file_access_temp) > 0: + # Compute the cutoff time (10 minutes ago from now) + cutoff_time = datetime.now() - timedelta(minutes=10) + # Update the list in-place to keep only entries newer than 10 minutes + file_access_temp[:] = [ + entry for entry in file_access_temp + if datetime.fromisoformat(entry[0]) >= cutoff_time + ] + return file_access_temp + else: + return [] + +def network(): + return render_template('network.html') + +@require_secret +def dashboard(): + timeframe = request.args.get('timeframe', 'today') + now = datetime.now() + + if timeframe == 'today': + start = now.replace(hour=0, minute=0, second=0, microsecond=0) + elif timeframe == '7days': + start = now - timedelta(days=7) + elif timeframe == '30days': + start = now - timedelta(days=30) + elif timeframe == '365days': + start = now - timedelta(days=365) + else: + start = now.replace(hour=0, minute=0, second=0, microsecond=0) + + conn = sqlite3.connect('access_log.db') + cursor = conn.cursor() + + # Raw file access counts for the table (top files) + cursor.execute(''' + SELECT full_path, COUNT(*) as access_count + FROM file_access_log + WHERE timestamp >= ? + GROUP BY full_path + ORDER BY access_count DESC + LIMIT 20 + ''', (start.isoformat(),)) + rows = cursor.fetchall() + + # Daily access trend for a line chart + cursor.execute(''' + SELECT date(timestamp) as date, COUNT(*) as count + FROM file_access_log + WHERE timestamp >= ? + GROUP BY date + ORDER BY date + ''', (start.isoformat(),)) + daily_access_data = [dict(date=row[0], count=row[1]) for row in cursor.fetchall()] + + # Top files for bar chart + cursor.execute(''' + SELECT full_path, COUNT(*) as access_count + FROM file_access_log + WHERE timestamp >= ? + GROUP BY full_path + ORDER BY access_count DESC + LIMIT 10 + ''', (start.isoformat(),)) + top_files_data = [dict(full_path=row[0], access_count=row[1]) for row in cursor.fetchall()] + + # User agent distribution (aggregate by device type) + cursor.execute(''' + SELECT user_agent, COUNT(*) as count + FROM file_access_log + WHERE timestamp >= ? + GROUP BY user_agent + ORDER BY count DESC + ''', (start.isoformat(),)) + raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()] + device_counts = {} + for entry in raw_user_agents: + device = get_device_type(entry['user_agent']) + device_counts[device] = device_counts.get(device, 0) + entry['count'] + # Rename to user_agent_data for compatibility with the frontend + user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()] + + # Referrer distribution (shorten links) + cursor.execute(''' + SELECT referrer, COUNT(*) as count + FROM file_access_log + WHERE timestamp >= ? + GROUP BY referrer + ORDER BY count DESC + LIMIT 10 + ''', (start.isoformat(),)) + referrer_data = [] + for row in cursor.fetchall(): + raw_ref = row[0] + shortened = shorten_referrer(raw_ref) if raw_ref else "Direct/None" + referrer_data.append(dict(referrer=shortened, count=row[1])) + + # Aggregate IP addresses with counts + cursor.execute(''' + SELECT ip_address, COUNT(*) as count + FROM file_access_log + WHERE timestamp >= ? + GROUP BY ip_address + ORDER BY count DESC + LIMIT 20 + ''', (start.isoformat(),)) + ip_rows = cursor.fetchall() + + # Initialize GeoIP2 reader once for efficiency + reader = geoip2.database.Reader('GeoLite2-City.mmdb') + ip_data = [] + for ip, count in ip_rows: + country, city = lookup_location(ip, reader) + ip_data.append(dict(ip=ip, count=count, country=country, city=city)) + reader.close() + + # Aggregate by city (ignoring entries without a city) + city_counts = {} + for entry in ip_data: + if entry['city']: + city_counts[entry['city']] = city_counts.get(entry['city'], 0) + entry['count'] + city_data = [dict(city=city, count=count) for city, count in city_counts.items()] + + # Summary stats using separate SQL queries + cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),)) + total_accesses = cursor.fetchone()[0] + + # Use a separate query to count unique files (distinct full_path values) + cursor.execute('SELECT COUNT(DISTINCT full_path) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),)) + unique_files = cursor.fetchone()[0] + + # Use a separate query to count unique IP addresses + cursor.execute('SELECT COUNT(DISTINCT ip_address) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),)) + unique_ips = cursor.fetchone()[0] + + conn.close() + + return render_template("dashboard.html", + timeframe=timeframe, + rows=rows, + daily_access_data=daily_access_data, + top_files_data=top_files_data, + user_agent_data=user_agent_data, + referrer_data=referrer_data, + ip_data=ip_data, + city_data=city_data, + total_accesses=total_accesses, + unique_files=unique_files, + unique_ips=unique_ips) \ No newline at end of file diff --git a/app.py b/app.py index 3a8af09..2503754 100755 --- a/app.py +++ b/app.py @@ -15,6 +15,11 @@ import geoip2.database from functools import lru_cache from urllib.parse import urlparse, unquote from werkzeug.middleware.proxy_fix import ProxyFix + + +from auth import require_secret +import analytics as a + cache = diskcache.Cache('./filecache', size_limit= 48 * 1024**3) # 48 GB limit app = Flask(__name__) @@ -26,8 +31,8 @@ if os.environ.get('FLASK_ENV') == 'production': app.config['SESSION_COOKIE_SAMESITE'] = 'None' app.config['SESSION_COOKIE_SECURE'] = True -with open('folder_config.json') as file: - app.config['folder_config'] = json.load(file) +app.add_url_rule('/dashboard', view_func=a.dashboard) +app.add_url_rule('/network', view_func=a.network) socketio = SocketIO(app) @@ -36,74 +41,6 @@ clients_connected = 0 background_thread = None thread_lock = threading.Lock() -def require_secret(f): - @wraps(f) - def decorated_function(*args, **kwargs): - # Your config list: - folder_config = app.config['folder_config'] - - def is_valid(config_item, provided_secret): - """ - Checks if today's date is <= validity date - AND if the provided secret matches config_item['secret']. - """ - folder_validity = config_item['validity'] - # Convert string to a date if necessary: - if isinstance(folder_validity, str): - folder_validity = datetime.strptime(folder_validity, '%d.%m.%Y').date() - - # Return whether it's still valid and secrets match: - return ( - date.today() <= folder_validity and - provided_secret == config_item['secret'] - ) - - # 1) Get secret from query params (if any) - args_secret = request.args.get('secret') - - # 2) Initialize 'allowed_secrets' in the session if missing - if 'allowed_secrets' not in session: - session['allowed_secrets'] = [] - - # 3) If a new secret is provided, check if it’s valid, and add to session if so - if args_secret: - for config_item in folder_config: - if is_valid(config_item, args_secret): - if args_secret not in session['allowed_secrets']: - session['allowed_secrets'].append(args_secret) - session.permanent = True # Make the session permanent - - # 4) Re-check validity of each secret in session['allowed_secrets'] - # If a secret is no longer valid (or not in config), remove it. - for secret_in_session in session['allowed_secrets'][:]: - # Find the current config item with matching secret - config_item = next( - (c for c in folder_config if c['secret'] == secret_in_session), - None - ) - # If the config item doesn’t exist or is invalid, remove secret - if config_item is None or not is_valid(config_item, secret_in_session): - session['allowed_secrets'].remove(secret_in_session) - - # 5) Build session['folders'] fresh from the valid secrets - session['folders'] = {} - for secret_in_session in session.get('allowed_secrets', []): - config_item = next( - (c for c in folder_config if c['secret'] == secret_in_session), - None - ) - if config_item: - for folder_info in config_item['folders']: - session['folders'][folder_info['foldername']] = folder_info['folderpath'] - - # 6) If we have folders, proceed; otherwise show index - if session['folders']: - return f(*args, **kwargs) - else: - return render_template('index.html') - - return decorated_function - @lru_cache(maxsize=10) def get_cached_image(size): dimensions = tuple(map(int, size.split('-')[1].split('x'))) @@ -199,70 +136,6 @@ def generate_breadcrumbs(subpath=None): breadcrumbs.append({'name': part, 'path': path_accum}) return breadcrumbs -def lookup_location(ip, reader): - try: - response = reader.city(ip) - country = response.country.name if response.country.name else "Unknown" - city = response.city.name if response.city.name else "Unknown" - return country, city - except Exception: - return "Unknown", "Unknown" - -def get_device_type(user_agent): - "classify device type based on user agent string" - if 'Android' in user_agent: - return 'Android' - elif 'iPhone' in user_agent or 'iPad' in user_agent: - return 'iOS' - elif 'Windows' in user_agent: - return 'Windows' - elif 'Macintosh' in user_agent or 'Mac OS' in user_agent: - return 'MacOS' - elif 'Linux' in user_agent: - return 'Linux' - else: - return 'Other' - -def shorten_referrer(url): - segments = [seg for seg in url.split('/') if seg] - segment = segments[-1] - # Decode all percent-encoded characters (like %20, %2F, etc.) - segment_decoded = unquote(segment) - return segment_decoded - -def log_file_access(full_path): - """ - Log file access details to a SQLite database. - Records the timestamp, full file path, client IP, user agent, and referrer. - """ - # Connect to the database (this will create the file if it doesn't exist) - conn = sqlite3.connect('access_log.db') - cursor = conn.cursor() - # Create the table if it doesn't exist - cursor.execute(''' - CREATE TABLE IF NOT EXISTS file_access_log ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - timestamp TEXT, - full_path TEXT, - ip_address TEXT, - user_agent TEXT, - referrer TEXT - ) - ''') - # Gather information from the request - timestamp = datetime.now().isoformat() - ip_address = request.remote_addr - user_agent = request.headers.get('User-Agent') - referrer = request.headers.get('Referer') - - # Insert the access record into the database - cursor.execute(''' - INSERT INTO file_access_log (timestamp, full_path, ip_address, user_agent, referrer) - VALUES (?, ?, ?, ?, ?) - ''', (timestamp, full_path, ip_address, user_agent, referrer)) - conn.commit() - conn.close() - @app.route('/static/icons/.png') def serve_resized_icon(size): cached_image_bytes = get_cached_image(size) @@ -307,136 +180,6 @@ def api_browse(subpath): 'files': files }) -@app.route("/dashboard") -@require_secret -def dashboard(): - timeframe = request.args.get('timeframe', 'today') - now = datetime.now() - - if timeframe == 'today': - start = now.replace(hour=0, minute=0, second=0, microsecond=0) - elif timeframe == '7days': - start = now - timedelta(days=7) - elif timeframe == '30days': - start = now - timedelta(days=30) - elif timeframe == '365days': - start = now - timedelta(days=365) - else: - start = now.replace(hour=0, minute=0, second=0, microsecond=0) - - conn = sqlite3.connect('access_log.db') - cursor = conn.cursor() - - # Raw file access counts for the table (top files) - cursor.execute(''' - SELECT full_path, COUNT(*) as access_count - FROM file_access_log - WHERE timestamp >= ? - GROUP BY full_path - ORDER BY access_count DESC - LIMIT 20 - ''', (start.isoformat(),)) - rows = cursor.fetchall() - - # Daily access trend for a line chart - cursor.execute(''' - SELECT date(timestamp) as date, COUNT(*) as count - FROM file_access_log - WHERE timestamp >= ? - GROUP BY date - ORDER BY date - ''', (start.isoformat(),)) - daily_access_data = [dict(date=row[0], count=row[1]) for row in cursor.fetchall()] - - # Top files for bar chart (limit to 10) - cursor.execute(''' - SELECT full_path, COUNT(*) as access_count - FROM file_access_log - WHERE timestamp >= ? - GROUP BY full_path - ORDER BY access_count DESC - LIMIT 10 - ''', (start.isoformat(),)) - top_files_data = [dict(full_path=row[0], access_count=row[1]) for row in cursor.fetchall()] - - # User agent distribution (aggregate by device type) - cursor.execute(''' - SELECT user_agent, COUNT(*) as count - FROM file_access_log - WHERE timestamp >= ? - GROUP BY user_agent - ORDER BY count DESC - ''', (start.isoformat(),)) - raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()] - device_counts = {} - for entry in raw_user_agents: - device = get_device_type(entry['user_agent']) - device_counts[device] = device_counts.get(device, 0) + entry['count'] - # Rename to user_agent_data for compatibility with the frontend - user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()] - - # Referrer distribution (shorten links) - cursor.execute(''' - SELECT referrer, COUNT(*) as count - FROM file_access_log - WHERE timestamp >= ? - GROUP BY referrer - ORDER BY count DESC - LIMIT 10 - ''', (start.isoformat(),)) - referrer_data = [] - for row in cursor.fetchall(): - raw_ref = row[0] - shortened = shorten_referrer(raw_ref) if raw_ref else "Direct/None" - referrer_data.append(dict(referrer=shortened, count=row[1])) - - # Aggregate IP addresses with counts - cursor.execute(''' - SELECT ip_address, COUNT(*) as count - FROM file_access_log - WHERE timestamp >= ? - GROUP BY ip_address - ORDER BY count DESC - LIMIT 20 - ''', (start.isoformat(),)) - ip_rows = cursor.fetchall() - - # Initialize GeoIP2 reader once for efficiency - reader = geoip2.database.Reader('GeoLite2-City.mmdb') - ip_data = [] - for ip, count in ip_rows: - country, city = lookup_location(ip, reader) - ip_data.append(dict(ip=ip, count=count, country=country, city=city)) - reader.close() - - # Aggregate by city (ignoring entries without a city) - city_counts = {} - for entry in ip_data: - if entry['city']: - city_counts[entry['city']] = city_counts.get(entry['city'], 0) + entry['count'] - city_data = [dict(city=city, count=count) for city, count in city_counts.items()] - - # Summary stats - total_accesses = sum([row[1] for row in rows]) - unique_files = len(rows) - cursor.execute('SELECT COUNT(DISTINCT ip_address) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),)) - unique_ips = cursor.fetchone()[0] - conn.close() - - return render_template("dashboard.html", - timeframe=timeframe, - rows=rows, - daily_access_data=daily_access_data, - top_files_data=top_files_data, - user_agent_data=user_agent_data, - referrer_data=referrer_data, - ip_data=ip_data, - city_data=city_data, - total_accesses=total_accesses, - unique_files=unique_files, - unique_ips=unique_ips) - - @app.route("/media/") @require_secret def serve_file(subpath): @@ -459,7 +202,7 @@ def serve_file(subpath): # only log initial hits and not the reload of further file parts range_header = request.headers.get('Range') if request.method != 'HEAD' and (not range_header or range_header.startswith("bytes=0-")): - log_file_access(full_path) + a.log_file_access(full_path) # Check cache first (using diskcache) response = None @@ -569,44 +312,30 @@ def crawl_and_cache(subpath): return json.dumps({"cached_files": cached_files}, indent=4), 200 def query_recent_connections(): - """ - Every 5 seconds, query the database for connections in the last 60 seconds, - sorted by timestamp (most recent first), and emit the data to clients. - This loop will exit when there are no connected clients. - """ global clients_connected + last_connections = None # Initialize with None to ensure the first emit happens while clients_connected > 0: - cutoff = datetime.now() - timedelta(seconds=60) - cutoff_iso = cutoff.isoformat() + rows = a.return_file_access() - # Query the SQLite database for recent connections - conn = sqlite3.connect('access_log.db') - cursor = conn.cursor() - cursor.execute(''' - SELECT * FROM file_access_log - WHERE timestamp >= ? - ORDER BY timestamp DESC - ''', (cutoff_iso,)) - rows = cursor.fetchall() - conn.close() + # Convert rows to dictionaries for the client. + connections = [ + { + 'timestamp': row[0], + 'full_path': row[1], + 'ip_address': row[2], + 'user_agent': row[3], + 'referrer': row[4] + } + for row in rows + ] - # Convert rows to dictionaries for the client, including all columns. - connections = [] - for row in rows: - # Row order: (id, timestamp, full_path, ip_address, user_agent, referrer) - connections.append({ - 'id': row[0], - 'timestamp': row[1], - 'full_path': row[2], - 'ip_address': row[3], - 'user_agent': row[4], - 'referrer': row[5] - }) - - # Emit the result over Socket.IO (to the default namespace) - socketio.emit('recent_connections', connections) - time.sleep(5) - # When no clients are connected, exit the thread. + # Only emit if there's a change compared to the previous connections. + if connections != last_connections: + socketio.emit('recent_connections', connections) + last_connections = connections.copy() # Store a copy of the current state + + time.sleep(1) + print("No clients connected; stopping query thread.") @socketio.on('connect') @@ -626,9 +355,6 @@ def handle_disconnect(): clients_connected -= 1 print("Client disconnected. Total clients:", clients_connected) -@app.route('/network') -def network(): - return render_template('network.html') # Catch-all route to serve the single-page application template. @app.route('/', defaults={'path': ''}) diff --git a/auth.py b/auth.py new file mode 100644 index 0000000..5157002 --- /dev/null +++ b/auth.py @@ -0,0 +1,76 @@ +from flask import Flask, render_template, request, session +from functools import wraps +from datetime import datetime, date, timedelta +import json + +folder_config = {} + +def require_secret(f): + @wraps(f) + def decorated_function(*args, **kwargs): + global folder_config + if not folder_config: + with open('folder_config.json') as file: + folder_config = json.load(file) + + def is_valid(config_item, provided_secret): + """ + Checks if today's date is <= validity date + AND if the provided secret matches config_item['secret']. + """ + folder_validity = config_item['validity'] + # Convert string to a date if necessary: + if isinstance(folder_validity, str): + folder_validity = datetime.strptime(folder_validity, '%d.%m.%Y').date() + + # Return whether it's still valid and secrets match: + return ( + date.today() <= folder_validity and + provided_secret == config_item['secret'] + ) + + # 1) Get secret from query params (if any) + args_secret = request.args.get('secret') + + # 2) Initialize 'allowed_secrets' in the session if missing + if 'allowed_secrets' not in session: + session['allowed_secrets'] = [] + + # 3) If a new secret is provided, check if it’s valid, and add to session if so + if args_secret: + for config_item in folder_config: + if is_valid(config_item, args_secret): + if args_secret not in session['allowed_secrets']: + session['allowed_secrets'].append(args_secret) + session.permanent = True # Make the session permanent + + # 4) Re-check validity of each secret in session['allowed_secrets'] + # If a secret is no longer valid (or not in config), remove it. + for secret_in_session in session['allowed_secrets'][:]: + # Find the current config item with matching secret + config_item = next( + (c for c in folder_config if c['secret'] == secret_in_session), + None + ) + # If the config item doesn’t exist or is invalid, remove secret + if config_item is None or not is_valid(config_item, secret_in_session): + session['allowed_secrets'].remove(secret_in_session) + + # 5) Build session['folders'] fresh from the valid secrets + session['folders'] = {} + for secret_in_session in session.get('allowed_secrets', []): + config_item = next( + (c for c in folder_config if c['secret'] == secret_in_session), + None + ) + if config_item: + for folder_info in config_item['folders']: + session['folders'][folder_info['foldername']] = folder_info['folderpath'] + + # 6) If we have folders, proceed; otherwise show index + if session['folders']: + return f(*args, **kwargs) + else: + return render_template('index.html') + + return decorated_function \ No newline at end of file diff --git a/templates/dashboard.html b/templates/dashboard.html index df68640..dd7e506 100644 --- a/templates/dashboard.html +++ b/templates/dashboard.html @@ -8,7 +8,6 @@ diff --git a/templates/network.html b/templates/network.html index 3c14589..3defed6 100644 --- a/templates/network.html +++ b/templates/network.html @@ -6,22 +6,37 @@ Recent Connections -
-

kürzlich verbunden... (in der letzten Minute)

-
- Home - Dashboard +
+
+

Downloads in den letzten 10 Minute

+
+ Home + Dashboard +
-
+
- @@ -48,9 +63,6 @@ data.forEach(record => { const row = document.createElement('tr'); - // Create cells for each column - const idCell = document.createElement('td'); - idCell.textContent = record.id; const timestampCell = document.createElement('td'); timestampCell.textContent = record.timestamp; const fullPathCell = document.createElement('td'); @@ -62,15 +74,12 @@ const referrerCell = document.createElement('td'); referrerCell.textContent = record.referrer; - // Append cells to the row - row.appendChild(idCell); row.appendChild(timestampCell); row.appendChild(fullPathCell); row.appendChild(ipCell); row.appendChild(userAgentCell); row.appendChild(referrerCell); - // Append the row to the table body tbody.appendChild(row); }); });
ID Timestamp Full Path IP Address