import sqlite3 from flask import render_template, request from datetime import datetime, timedelta import geoip2.database from auth import require_secret import os file_access_temp = [] # Example database name; you can change to whatever you want: DB_NAME = 'access_log.db' # Create a single global connection to SQLite log_db = sqlite3.connect(DB_NAME, check_same_thread=False) def init_log_db(): """Create the file_access_log table if it doesn't already exist.""" with log_db: log_db.execute(''' CREATE TABLE IF NOT EXISTS file_access_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, rel_path TEXT, filesize INTEGER, mime TEXT, ip_address TEXT, user_agent TEXT, device_id TEXT, cached BOOLEAN ) ''') init_log_db() def lookup_location(ip, reader): try: response = reader.city(ip) country = response.country.name if response.country.name else "Unknown" city = response.city.name if response.city.name else "Unknown" return country, city except Exception: return "Unknown", "Unknown" def get_device_type(user_agent): """Classify device type based on user agent string.""" if 'Android' in user_agent: return 'Android' elif 'iPhone' in user_agent or 'iPad' in user_agent: return 'iOS' elif 'Windows' in user_agent: return 'Windows' elif 'Macintosh' in user_agent or 'Mac OS' in user_agent: return 'MacOS' elif 'Linux' in user_agent: return 'Linux' else: return 'Other' def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached): """Insert a file access record into the database.""" global file_access_temp timestamp = datetime.now() # a datetime object # Store the ISO timestamp in the database for easy lexical comparison iso_ts = timestamp.isoformat() with log_db: log_db.execute(''' INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', (iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)) file_access_temp.insert(0, [iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached]) return iso_ts def return_file_access(): """Return recent file access logs from memory (the last 10 minutes).""" global file_access_temp if file_access_temp: cutoff_time = datetime.now() - timedelta(minutes=10) # Convert each stored timestamp (ISO string) back to datetime file_access_temp[:] = [ entry for entry in file_access_temp if datetime.fromisoformat(entry[0]) >= cutoff_time ] return file_access_temp else: return [] @require_secret def connections(): return render_template('connections.html') @require_secret def dashboard(): filetype_arg = request.args.get('filetype', 'audio') timeframe = request.args.get('timeframe', 'today') now = datetime.now() # Determine which file type we're filtering by. filetype = 'other' # Some simplistic sets to decide how we match the MIME type audio_list = ['mp3', 'wav', 'audio'] image_list = ['jpg', 'jpeg', 'image', 'photo'] video_list = ['mp4', 'mov', 'wmv', 'avi'] if filetype_arg.lower() in audio_list: filetype = 'audio/' elif filetype_arg.lower() in image_list: filetype = 'image/' elif filetype_arg.lower() in video_list: filetype = 'video/' # Determine start time based on timeframe if timeframe == 'today': start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0) elif timeframe == '7days': start_dt = now - timedelta(days=7) elif timeframe == '30days': start_dt = now - timedelta(days=30) elif timeframe == '365days': start_dt = now - timedelta(days=365) else: start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0) # We'll compare the textual timestamp (ISO 8601). start_str = start_dt.isoformat() # Build the SQL filter if filetype == 'other': # Exclude audio, image, video filetype_filter_sql = ( "AND mime NOT LIKE 'audio/%' " "AND mime NOT LIKE 'image/%' " "AND mime NOT LIKE 'video/%' " ) params_for_filter = (start_str,) else: # Filter for mimes that start with the given type filetype_filter_sql = "AND mime LIKE ?" params_for_filter = (start_str, filetype + '%') # 1. Top files by access count query = f''' SELECT rel_path, COUNT(*) as access_count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY rel_path ORDER BY access_count DESC LIMIT 20 ''' with log_db: cursor = log_db.execute(query, params_for_filter) rows = cursor.fetchall() # 2. Distinct device trend # We'll group by hour if "today", by day if "7days"/"30days", by month if "365days" if timeframe == 'today': # Group by hour: substr(timestamp, 12, 2) -> HH query = f''' SELECT substr(timestamp, 12, 2) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif timeframe in ('7days', '30days'): # Group by day: substr(timestamp, 1, 10) -> YYYY-MM-DD query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif timeframe == '365days': # Group by month: substr(timestamp, 1, 7) -> YYYY-MM query = f''' SELECT substr(timestamp, 1, 7) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' else: # Default: group by day query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(DISTINCT device_id) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' with log_db: cursor = log_db.execute(query, params_for_filter) distinct_device_data_rows = cursor.fetchall() distinct_device_data = [ dict(bucket=r[0], count=r[1]) for r in distinct_device_data_rows ] # 3. Timeframe-based aggregation # We'll group by hour if "today", by day if "7days"/"30days", by month if "365days". if timeframe == 'today': # Hour: substr(timestamp, 12, 2) -> HH query = f''' SELECT substr(timestamp, 12, 2) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif timeframe in ('7days', '30days'): # Day: substr(timestamp, 1, 10) -> YYYY-MM-DD query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' elif timeframe == '365days': # Month: substr(timestamp, 1, 7) -> YYYY-MM query = f''' SELECT substr(timestamp, 1, 7) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' else: # Default: group by day query = f''' SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY bucket ORDER BY bucket ''' with log_db: cursor = log_db.execute(query, params_for_filter) timeframe_data_rows = cursor.fetchall() timeframe_data = [ dict(bucket=r[0], count=r[1]) for r in timeframe_data_rows ] # 4. User agent distribution query = f''' SELECT user_agent, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY user_agent ORDER BY count DESC ''' with log_db: cursor = log_db.execute(query, params_for_filter) raw_user_agents = cursor.fetchall() device_counts = {} for (ua, cnt) in raw_user_agents: device = get_device_type(ua) device_counts[device] = device_counts.get(device, 0) + cnt user_agent_data = [ dict(device=d, count=c) for d, c in device_counts.items() ] # 5. Parent folder distribution query = f''' SELECT rel_path, COUNT(*) AS count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY rel_path ORDER BY count DESC ''' folder_data_dict = {} with log_db: cursor = log_db.execute(query, params_for_filter) for (rp, c) in cursor.fetchall(): if '/' in rp: parent_folder = rp.rsplit('/', 1)[0] else: parent_folder = "Root" folder_data_dict[parent_folder] = folder_data_dict.get(parent_folder, 0) + c folder_data = [dict(folder=f, count=cnt) for f, cnt in folder_data_dict.items()] folder_data.sort(key=lambda x: x['count'], reverse=True) folder_data = folder_data[:10] # 6. Aggregate IP addresses with counts query = f''' SELECT ip_address, COUNT(*) as count FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} GROUP BY ip_address ORDER BY count DESC ''' with log_db: cursor = log_db.execute(query, params_for_filter) ip_rows = cursor.fetchall() # 7. Summary stats # total_accesses query = f''' SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} ''' with log_db: cursor = log_db.execute(query, params_for_filter) total_accesses = cursor.fetchone()[0] # unique_files query = f''' SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} ''' with log_db: cursor = log_db.execute(query, params_for_filter) unique_files = cursor.fetchone()[0] # unique_user query = f''' SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= ? {filetype_filter_sql} ''' with log_db: cursor = log_db.execute(query, params_for_filter) unique_user = cursor.fetchone()[0] # 8. Process location data with GeoIP2 reader = geoip2.database.Reader('GeoLite2-City.mmdb') location_data_dict = {} for (ip_addr, cnt) in ip_rows: country, city = lookup_location(ip_addr, reader) key = (country, city) location_data_dict[key] = location_data_dict.get(key, 0) + cnt reader.close() location_data = [ dict(country=k[0], city=k[1], count=v) for k, v in location_data_dict.items() ] location_data.sort(key=lambda x: x['count'], reverse=True) location_data = location_data[:20] # Convert the top-files rows to a list of dictionaries rows = [dict(rel_path=r[0], access_count=r[1]) for r in rows] return render_template( "dashboard.html", timeframe=timeframe, rows=rows, distinct_device_data=distinct_device_data, user_agent_data=user_agent_data, folder_data=folder_data, location_data=location_data, total_accesses=total_accesses, unique_files=unique_files, unique_user=unique_user, timeframe_data=timeframe_data )