back to sqlite
This commit is contained in:
parent
4db37c49ff
commit
76eca80a4a
394
analytics.py
394
analytics.py
@ -1,37 +1,27 @@
|
||||
import sqlite3
|
||||
from flask import render_template, request
|
||||
from datetime import datetime, timedelta
|
||||
import geoip2.database
|
||||
from auth import require_secret
|
||||
import os
|
||||
import psycopg2
|
||||
|
||||
file_access_temp = []
|
||||
|
||||
dbname = os.environ.get('DB_NAME')
|
||||
user = os.environ.get('DB_USER')
|
||||
password = os.environ.get('DB_PASSWORD')
|
||||
host = os.environ.get('DB_HOST')
|
||||
port = int(os.environ.get('DB_PORT', 5432))
|
||||
# Example database name; you can change to whatever you want:
|
||||
DB_NAME = 'access_log.db'
|
||||
|
||||
connection = psycopg2.connect(dbname=dbname,
|
||||
user=user,
|
||||
password=password,
|
||||
host=host,
|
||||
port=port
|
||||
)
|
||||
# Enable autocommit
|
||||
connection.autocommit = True
|
||||
log_db = connection
|
||||
# Create a single global connection to SQLite
|
||||
log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
|
||||
|
||||
# Function to initialize the database.
|
||||
def init_log_db():
|
||||
with log_db.cursor() as cursor:
|
||||
cursor.execute('''
|
||||
"""Create the file_access_log table if it doesn't already exist."""
|
||||
with log_db:
|
||||
log_db.execute('''
|
||||
CREATE TABLE IF NOT EXISTS file_access_log (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP,
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TEXT,
|
||||
rel_path TEXT,
|
||||
filesize BIGINT,
|
||||
filesize INTEGER,
|
||||
mime TEXT,
|
||||
ip_address TEXT,
|
||||
user_agent TEXT,
|
||||
@ -42,8 +32,6 @@ def init_log_db():
|
||||
|
||||
init_log_db()
|
||||
|
||||
|
||||
|
||||
def lookup_location(ip, reader):
|
||||
try:
|
||||
response = reader.city(ip)
|
||||
@ -54,7 +42,7 @@ def lookup_location(ip, reader):
|
||||
return "Unknown", "Unknown"
|
||||
|
||||
def get_device_type(user_agent):
|
||||
"Classify device type based on user agent string"
|
||||
"""Classify device type based on user agent string."""
|
||||
if 'Android' in user_agent:
|
||||
return 'Android'
|
||||
elif 'iPhone' in user_agent or 'iPad' in user_agent:
|
||||
@ -68,22 +56,30 @@ def get_device_type(user_agent):
|
||||
else:
|
||||
return 'Other'
|
||||
|
||||
# Logging function that uses the singleton connection.
|
||||
def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
|
||||
"""Insert a file access record into the database."""
|
||||
global file_access_temp
|
||||
timestamp = datetime.now() # Use datetime object directly
|
||||
with log_db.connection.cursor() as cursor:
|
||||
cursor.execute('''
|
||||
INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
''', (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
|
||||
file_access_temp.insert(0, [timestamp.isoformat(), rel_path, filesize, mime, ip_address, user_agent, device_id, cached])
|
||||
return timestamp.isoformat()
|
||||
timestamp = datetime.now() # a datetime object
|
||||
|
||||
# Store the ISO timestamp in the database for easy lexical comparison
|
||||
iso_ts = timestamp.isoformat()
|
||||
|
||||
with log_db:
|
||||
log_db.execute('''
|
||||
INSERT INTO file_access_log
|
||||
(timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
|
||||
file_access_temp.insert(0, [iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached])
|
||||
|
||||
return iso_ts
|
||||
|
||||
def return_file_access():
|
||||
"""Return recent file access logs from memory (the last 10 minutes)."""
|
||||
global file_access_temp
|
||||
if file_access_temp:
|
||||
cutoff_time = datetime.now() - timedelta(minutes=10)
|
||||
# Convert each stored timestamp (ISO string) back to datetime
|
||||
file_access_temp[:] = [
|
||||
entry for entry in file_access_temp
|
||||
if datetime.fromisoformat(entry[0]) >= cutoff_time
|
||||
@ -105,186 +101,234 @@ def dashboard():
|
||||
# Determine which file type we're filtering by.
|
||||
filetype = 'other'
|
||||
|
||||
allowed_list = ['mp3', 'wav', 'audio']
|
||||
if filetype_arg.lower() in allowed_list:
|
||||
# Some simplistic sets to decide how we match the MIME type
|
||||
audio_list = ['mp3', 'wav', 'audio']
|
||||
image_list = ['jpg', 'jpeg', 'image', 'photo']
|
||||
video_list = ['mp4', 'mov', 'wmv', 'avi']
|
||||
|
||||
if filetype_arg.lower() in audio_list:
|
||||
filetype = 'audio/'
|
||||
|
||||
allowed_list = ['jpg', 'jpeg', 'image', 'photo']
|
||||
if filetype_arg.lower() in allowed_list:
|
||||
elif filetype_arg.lower() in image_list:
|
||||
filetype = 'image/'
|
||||
|
||||
allowed_list = ['mp4', 'mov', 'wmv', 'avi']
|
||||
if filetype_arg.lower() in allowed_list:
|
||||
elif filetype_arg.lower() in video_list:
|
||||
filetype = 'video/'
|
||||
|
||||
|
||||
|
||||
# Determine the start time based on timeframe.
|
||||
# Determine start time based on timeframe
|
||||
if timeframe == 'today':
|
||||
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
elif timeframe == '7days':
|
||||
start = now - timedelta(days=7)
|
||||
start_dt = now - timedelta(days=7)
|
||||
elif timeframe == '30days':
|
||||
start = now - timedelta(days=30)
|
||||
start_dt = now - timedelta(days=30)
|
||||
elif timeframe == '365days':
|
||||
start = now - timedelta(days=365)
|
||||
start_dt = now - timedelta(days=365)
|
||||
else:
|
||||
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
# Build the SQL filter for mime
|
||||
# We'll compare the textual timestamp (ISO 8601).
|
||||
start_str = start_dt.isoformat()
|
||||
|
||||
# Build the SQL filter
|
||||
if filetype == 'other':
|
||||
# Exclude audio, image, and video mimes
|
||||
filetype_filter_sql = "AND mime NOT LIKE 'audio/%' AND mime NOT LIKE 'image/%' AND mime NOT LIKE 'video/%'"
|
||||
params = (start,)
|
||||
# Exclude audio, image, video
|
||||
filetype_filter_sql = (
|
||||
"AND mime NOT LIKE 'audio/%' "
|
||||
"AND mime NOT LIKE 'image/%' "
|
||||
"AND mime NOT LIKE 'video/%' "
|
||||
)
|
||||
params_for_filter = (start_str,)
|
||||
else:
|
||||
# Filter for mimes that start with the given type.
|
||||
filetype_filter_sql = "AND mime LIKE %s"
|
||||
params = (start, filetype + '%')
|
||||
# Filter for mimes that start with the given type
|
||||
filetype_filter_sql = "AND mime LIKE ?"
|
||||
params_for_filter = (start_str, filetype + '%')
|
||||
|
||||
with log_db.connection.cursor() as cursor:
|
||||
# Raw file access counts (top files)
|
||||
query = f'''
|
||||
SELECT rel_path, COUNT(*) as access_count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY rel_path
|
||||
ORDER BY access_count DESC
|
||||
LIMIT 20
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
# 1. Top files by access count
|
||||
query = f'''
|
||||
SELECT rel_path, COUNT(*) as access_count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY rel_path
|
||||
ORDER BY access_count DESC
|
||||
LIMIT 20
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Daily access trend for a line chart
|
||||
query = f'''
|
||||
SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY CAST(timestamp AS DATE)
|
||||
ORDER BY date
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
|
||||
# 2. Daily access trend (line chart)
|
||||
# We'll group by day using substr(timestamp, 1, 10) -> YYYY-MM-DD
|
||||
query = f'''
|
||||
SELECT substr(timestamp, 1, 10) AS date, COUNT(*) AS count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY date
|
||||
ORDER BY date
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
daily_rows = cursor.fetchall()
|
||||
daily_access_data = [
|
||||
dict(date=r[0], count=r[1]) for r in daily_rows
|
||||
]
|
||||
|
||||
# Aggregate download counts by time bucket according to the timeframe.
|
||||
if timeframe == 'today':
|
||||
query = f'''
|
||||
SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
elif timeframe in ('7days', '30days'):
|
||||
query = f'''
|
||||
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
elif timeframe == '365days':
|
||||
query = f'''
|
||||
SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
else:
|
||||
query = f'''
|
||||
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
|
||||
|
||||
# User agent distribution (aggregate by device type)
|
||||
# 3. Timeframe-based aggregation
|
||||
# We'll group by hour if "today", by day if "7days"/"30days", by month if "365days".
|
||||
if timeframe == 'today':
|
||||
# Hour: substr(timestamp, 12, 2) -> HH
|
||||
query = f'''
|
||||
SELECT user_agent, COUNT(*) as count
|
||||
SELECT substr(timestamp, 12, 2) AS bucket, COUNT(*) AS count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
|
||||
device_counts = {}
|
||||
for entry in raw_user_agents:
|
||||
device = get_device_type(entry['user_agent'])
|
||||
device_counts[device] = device_counts.get(device, 0) + entry['count']
|
||||
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
|
||||
|
||||
# Parent folder distribution
|
||||
elif timeframe in ('7days', '30days'):
|
||||
# Day: substr(timestamp, 1, 10) -> YYYY-MM-DD
|
||||
query = f'''
|
||||
SELECT rel_path, COUNT(*) as count
|
||||
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY rel_path
|
||||
ORDER BY count DESC
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
folder_data = {}
|
||||
for row in cursor.fetchall():
|
||||
rel_path = row[0]
|
||||
parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root"
|
||||
folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1]
|
||||
folder_data = [dict(folder=folder, count=count) for folder, count in folder_data.items()]
|
||||
folder_data.sort(key=lambda x: x['count'], reverse=True)
|
||||
folder_data = folder_data[:10]
|
||||
|
||||
# Aggregate IP addresses with counts
|
||||
elif timeframe == '365days':
|
||||
# Month: substr(timestamp, 1, 7) -> YYYY-MM
|
||||
query = f'''
|
||||
SELECT ip_address, COUNT(*) as count
|
||||
SELECT substr(timestamp, 1, 7) AS bucket, COUNT(*) AS count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= %s {filetype_filter_sql}
|
||||
GROUP BY ip_address
|
||||
ORDER BY count DESC
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
cursor.execute(query, params)
|
||||
else:
|
||||
# Default: group by day
|
||||
query = f'''
|
||||
SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
timeframe_data_rows = cursor.fetchall()
|
||||
timeframe_data = [
|
||||
dict(bucket=r[0], count=r[1]) for r in timeframe_data_rows
|
||||
]
|
||||
|
||||
# 4. User agent distribution
|
||||
query = f'''
|
||||
SELECT user_agent, COUNT(*) AS count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
raw_user_agents = cursor.fetchall()
|
||||
device_counts = {}
|
||||
for (ua, cnt) in raw_user_agents:
|
||||
device = get_device_type(ua)
|
||||
device_counts[device] = device_counts.get(device, 0) + cnt
|
||||
user_agent_data = [
|
||||
dict(device=d, count=c) for d, c in device_counts.items()
|
||||
]
|
||||
|
||||
# 5. Parent folder distribution
|
||||
query = f'''
|
||||
SELECT rel_path, COUNT(*) AS count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY rel_path
|
||||
ORDER BY count DESC
|
||||
'''
|
||||
folder_data_dict = {}
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
for (rp, c) in cursor.fetchall():
|
||||
if '/' in rp:
|
||||
parent_folder = rp.rsplit('/', 1)[0]
|
||||
else:
|
||||
parent_folder = "Root"
|
||||
folder_data_dict[parent_folder] = folder_data_dict.get(parent_folder, 0) + c
|
||||
folder_data = [dict(folder=f, count=cnt) for f, cnt in folder_data_dict.items()]
|
||||
folder_data.sort(key=lambda x: x['count'], reverse=True)
|
||||
folder_data = folder_data[:10]
|
||||
|
||||
# 6. Aggregate IP addresses with counts
|
||||
query = f'''
|
||||
SELECT ip_address, COUNT(*) as count
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
GROUP BY ip_address
|
||||
ORDER BY count DESC
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
ip_rows = cursor.fetchall()
|
||||
|
||||
# Summary stats using separate SQL queries
|
||||
query = f'SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
|
||||
cursor.execute(query, params)
|
||||
# 7. Summary stats
|
||||
# total_accesses
|
||||
query = f'''
|
||||
SELECT COUNT(*)
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
total_accesses = cursor.fetchone()[0]
|
||||
|
||||
query = f'SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
|
||||
cursor.execute(query, params)
|
||||
# unique_files
|
||||
query = f'''
|
||||
SELECT COUNT(DISTINCT rel_path)
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
unique_files = cursor.fetchone()[0]
|
||||
|
||||
query = f'SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
|
||||
cursor.execute(query, params)
|
||||
# unique_user
|
||||
query = f'''
|
||||
SELECT COUNT(DISTINCT device_id)
|
||||
FROM file_access_log
|
||||
WHERE timestamp >= ? {filetype_filter_sql}
|
||||
'''
|
||||
with log_db:
|
||||
cursor = log_db.execute(query, params_for_filter)
|
||||
unique_user = cursor.fetchone()[0]
|
||||
|
||||
# Process location data with GeoIP2.
|
||||
# 8. Process location data with GeoIP2
|
||||
reader = geoip2.database.Reader('GeoLite2-City.mmdb')
|
||||
location_data = {}
|
||||
for ip, count in ip_rows:
|
||||
country, city = lookup_location(ip, reader)
|
||||
location_data_dict = {}
|
||||
for (ip_addr, cnt) in ip_rows:
|
||||
country, city = lookup_location(ip_addr, reader)
|
||||
key = (country, city)
|
||||
location_data[key] = location_data.get(key, 0) + count
|
||||
location_data_dict[key] = location_data_dict.get(key, 0) + cnt
|
||||
reader.close()
|
||||
|
||||
location_data = [dict(country=key[0], city=key[1], count=value) for key, value in location_data.items()]
|
||||
location_data = [
|
||||
dict(country=k[0], city=k[1], count=v)
|
||||
for k, v in location_data_dict.items()
|
||||
]
|
||||
location_data.sort(key=lambda x: x['count'], reverse=True)
|
||||
location_data = location_data[:20]
|
||||
|
||||
return render_template("dashboard.html",
|
||||
timeframe=timeframe,
|
||||
rows=rows,
|
||||
daily_access_data=daily_access_data,
|
||||
user_agent_data=user_agent_data,
|
||||
folder_data=folder_data,
|
||||
location_data=location_data,
|
||||
total_accesses=total_accesses,
|
||||
unique_files=unique_files,
|
||||
unique_user=unique_user,
|
||||
timeframe_data=timeframe_data)
|
||||
|
||||
# Convert the top-files rows to a list of dictionaries
|
||||
# (just for consistency in passing to template).
|
||||
rows = [dict(rel_path=r[0], access_count=r[1]) for r in rows]
|
||||
|
||||
return render_template(
|
||||
"dashboard.html",
|
||||
timeframe=timeframe,
|
||||
rows=rows,
|
||||
daily_access_data=daily_access_data,
|
||||
user_agent_data=user_agent_data,
|
||||
folder_data=folder_data,
|
||||
location_data=location_data,
|
||||
total_accesses=total_accesses,
|
||||
unique_files=unique_files,
|
||||
unique_user=unique_user,
|
||||
timeframe_data=timeframe_data
|
||||
)
|
||||
|
||||
@ -19,15 +19,8 @@ services:
|
||||
- FLASK_ENV=production
|
||||
- TITLE_SHORT=${TITLE_SHORT}
|
||||
- TITLE_LONG=${TITLE_LONG}
|
||||
- DB_HOST=postgres-db
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- DB_NAME=${DB_NAME}
|
||||
depends_on:
|
||||
- "postgres"
|
||||
networks:
|
||||
- traefik
|
||||
- internal
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
|
||||
@ -51,22 +44,7 @@ services:
|
||||
sh -c "pip install -r requirements.txt &&
|
||||
gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app"
|
||||
|
||||
postgres:
|
||||
image: postgres:17
|
||||
restart: always
|
||||
environment:
|
||||
POSTGRES_USER: ${DB_USER:?}
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD:?}
|
||||
POSTGRES_DB: ${DB_NAME:?}
|
||||
volumes:
|
||||
- ./postgres_data:/var/lib/postgresql/data
|
||||
networks:
|
||||
internal:
|
||||
aliases:
|
||||
- postgres-db
|
||||
|
||||
networks:
|
||||
traefik:
|
||||
external: true
|
||||
internal:
|
||||
internal: true
|
||||
|
||||
@ -6,4 +6,3 @@ diskcache
|
||||
geoip2
|
||||
gunicorn
|
||||
eventlet
|
||||
psycopg2-binary
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user