diff --git a/analytics.py b/analytics.py index eeec0e5..702a386 100644 --- a/analytics.py +++ b/analytics.py @@ -1,10 +1,8 @@ -from flask import render_template, request, session +from flask import render_template, request from datetime import datetime, timedelta import geoip2.database -from urllib.parse import urlparse, unquote from auth import require_secret import os -import threading import psycopg2 file_access_temp = [] @@ -26,12 +24,13 @@ class Database(metaclass=SingletonMeta): self.password = os.environ.get('DB_PASSWORD') self.host = os.environ.get('DB_HOST') self.port = int(os.environ.get('DB_PORT', 5432)) + self.connection = psycopg2.connect(dbname=self.dbname, user=self.user, password=self.password, host=self.host, port=self.port) - # Enable autocommit so we don't have to call commit() after every transaction. + # Enable autocommit self.connection.autocommit = True self.init_log_db() @@ -52,12 +51,9 @@ class Database(metaclass=SingletonMeta): cached BOOLEAN ) ''') + +log_db = Database() -try: - # Create a global database instance. - log_db = Database() -except: - print("No access to database. No logs available!!!") def lookup_location(ip, reader): try: @@ -111,9 +107,28 @@ def connections(): @require_secret def dashboard(): + filetype_arg = request.args.get('filetype', 'audio') timeframe = request.args.get('timeframe', 'today') now = datetime.now() + # Determine which file type we're filtering by. + filetype = 'other' + + allowed_list = ['mp3', 'wav', 'audio'] + if filetype_arg.lower() in allowed_list: + filetype = 'audio/' + + allowed_list = ['jpg', 'jpeg', 'image', 'photo'] + if filetype_arg.lower() in allowed_list: + filetype = 'image/' + + allowed_list = ['mp4', 'mov', 'wmv', 'avi'] + if filetype_arg.lower() in allowed_list: + filetype = 'video/' + + + + # Determine the start time based on timeframe. if timeframe == 'today': start = now.replace(hour=0, minute=0, second=0, microsecond=0) elif timeframe == '7days': @@ -125,75 +140,88 @@ def dashboard(): else: start = now.replace(hour=0, minute=0, second=0, microsecond=0) + # Build the SQL filter for mime + if filetype == 'other': + # Exclude audio, image, and video mimes + filetype_filter_sql = "AND mime NOT LIKE 'audio/%' AND mime NOT LIKE 'image/%' AND mime NOT LIKE 'video/%'" + params = (start,) + else: + # Filter for mimes that start with the given type. + filetype_filter_sql = "AND mime LIKE %s" + params = (start, filetype + '%') + with log_db.connection.cursor() as cursor: - # Raw file access counts for the table (top files) - cursor.execute(''' + # Raw file access counts (top files) + query = f''' SELECT rel_path, COUNT(*) as access_count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY rel_path ORDER BY access_count DESC LIMIT 20 - ''', (start,)) + ''' + cursor.execute(query, params) rows = cursor.fetchall() # Daily access trend for a line chart - cursor.execute(''' + query = f''' SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY CAST(timestamp AS DATE) ORDER BY date - ''', (start,)) + ''' + cursor.execute(query, params) daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()] # Aggregate download counts by time bucket according to the timeframe. if timeframe == 'today': - # Group by hour using to_char - cursor.execute(''' + query = f''' SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY bucket ORDER BY bucket - ''', (start,)) + ''' + cursor.execute(query, params) elif timeframe in ('7days', '30days'): - # Group by day - cursor.execute(''' + query = f''' SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY bucket ORDER BY bucket - ''', (start,)) + ''' + cursor.execute(query, params) elif timeframe == '365days': - # Group by month using to_char - cursor.execute(''' + query = f''' SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY bucket ORDER BY bucket - ''', (start,)) + ''' + cursor.execute(query, params) else: - # Fallback: group by day - cursor.execute(''' + query = f''' SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY bucket ORDER BY bucket - ''', (start,)) + ''' + cursor.execute(query, params) timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()] # User agent distribution (aggregate by device type) - cursor.execute(''' + query = f''' SELECT user_agent, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY user_agent ORDER BY count DESC - ''', (start,)) + ''' + cursor.execute(query, params) raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()] device_counts = {} for entry in raw_user_agents: @@ -202,13 +230,14 @@ def dashboard(): user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()] # Parent folder distribution - cursor.execute(''' + query = f''' SELECT rel_path, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY rel_path ORDER BY count DESC - ''', (start,)) + ''' + cursor.execute(query, params) folder_data = {} for row in cursor.fetchall(): rel_path = row[0] @@ -219,23 +248,27 @@ def dashboard(): folder_data = folder_data[:10] # Aggregate IP addresses with counts - cursor.execute(''' + query = f''' SELECT ip_address, COUNT(*) as count FROM file_access_log - WHERE timestamp >= %s + WHERE timestamp >= %s {filetype_filter_sql} GROUP BY ip_address ORDER BY count DESC - ''', (start,)) + ''' + cursor.execute(query, params) ip_rows = cursor.fetchall() # Summary stats using separate SQL queries - cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,)) + query = f'SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}' + cursor.execute(query, params) total_accesses = cursor.fetchone()[0] - cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,)) + query = f'SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}' + cursor.execute(query, params) unique_files = cursor.fetchone()[0] - cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,)) + query = f'SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}' + cursor.execute(query, params) unique_user = cursor.fetchone()[0] # Process location data with GeoIP2. @@ -263,3 +296,4 @@ def dashboard(): unique_user=unique_user, timeframe_data=timeframe_data) + diff --git a/docker-compose.yml b/docker-compose.yml index 4becdc4..6e6755e 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,13 +19,13 @@ services: - FLASK_ENV=production - TITLE_SHORT=${TITLE_SHORT} - TITLE_LONG=${TITLE_LONG} - - DB_HOST=${CONTAINER_NAME}.sql + - DB_HOST=postgres-db - DB_PORT=5432 - - DB_USER=${POSTGRES_USER} - - DB_PASSWORD=${POSTGRES_PASSWORD} - - DB_NAME=${POSTGRES_DB} + - DB_USER=${DB_USER} + - DB_PASSWORD=${DB_PASSWORD} + - DB_NAME=${DB_NAME} depends_on: - - postgres + - "postgres" networks: - traefik - internal @@ -54,16 +54,18 @@ services: postgres: image: postgres:15 - container_name: "${CONTAINER_NAME}.sql" + container_name: "${CONTAINER_NAME}-db" restart: always environment: - POSTGRES_USER: ${POSTGRES_USER} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_DB: ${POSTGRES_DB} + POSTGRES_USER: ${DB_USER:?} + POSTGRES_PASSWORD: ${DB_PASSWORD:?} + POSTGRES_DB: ${DB_NAME:?} volumes: - ./postgres_data:/var/lib/postgresql/data networks: - - internal + internal: + aliases: + - postgres-db networks: traefik: