initial postgres

This commit is contained in:
lelo 2025-03-31 17:37:39 +00:00
parent 71c0585380
commit bc4aa70651
5 changed files with 225 additions and 187 deletions

1
.gitignore vendored
View File

@ -4,6 +4,7 @@
/filecache_image /filecache_image
/filecache_video /filecache_video
/filecache_other /filecache_other
/postgres_data
/instance /instance
/__pycache__ /__pycache__
/access_log.db /access_log.db

View File

@ -1,13 +1,45 @@
from flask import render_template, request, session from flask import render_template, request, session
import sqlite3 from datetime import datetime, timedelta
from datetime import datetime, date, timedelta
import geoip2.database import geoip2.database
from urllib.parse import urlparse, unquote from urllib.parse import urlparse, unquote
from auth import require_secret from auth import require_secret
import os
import threading
import psycopg2
file_access_temp = [] file_access_temp = []
# Thread-safe singleton metaclass.
class SingletonMeta(type):
_instances = {}
_lock = threading.Lock() # Ensures thread safety.
def __call__(cls, *args, **kwargs):
with cls._lock:
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs)
cls._instances[cls] = instance
return cls._instances[cls]
# Database class that only handles the connection.
class Database(metaclass=SingletonMeta):
def __init__(self):
self.dbname = os.environ.get('DB_NAME')
self.user = os.environ.get('DB_USER')
self.password = os.environ.get('DB_PASSWORD')
self.host = os.environ.get('DB_HOST')
self.port = int(os.environ.get('DB_PORT', 5432))
self.connection = psycopg2.connect(dbname=self.dbname,
user=self.user,
password=self.password,
host=self.host,
port=self.port)
# Enable autocommit so we don't have to call commit() after every transaction.
self.connection.autocommit = True
# Create a global database instance.
log_db = Database()
def lookup_location(ip, reader): def lookup_location(ip, reader):
try: try:
response = reader.city(ip) response = reader.city(ip)
@ -18,7 +50,7 @@ def lookup_location(ip, reader):
return "Unknown", "Unknown" return "Unknown", "Unknown"
def get_device_type(user_agent): def get_device_type(user_agent):
"classify device type based on user agent string" "Classify device type based on user agent string"
if 'Android' in user_agent: if 'Android' in user_agent:
return 'Android' return 'Android'
elif 'iPhone' in user_agent or 'iPad' in user_agent: elif 'iPhone' in user_agent or 'iPad' in user_agent:
@ -32,45 +64,37 @@ def get_device_type(user_agent):
else: else:
return 'Other' return 'Other'
def log_file_access(rel_path, ip_address, user_agent, device_id): # Function to initialize the database.
""" def init_log_db():
Log file access details to a SQLite database. with log_db.connection.cursor() as cursor:
Records the timestamp, full file path, client IP, user agent, and device_id.
"""
global file_access_temp
# Connect to the database (this will create the file if it doesn't exist)
conn = sqlite3.connect('access_log.db')
cursor = conn.cursor()
# Create the table if it doesn't exist
cursor.execute(''' cursor.execute('''
CREATE TABLE IF NOT EXISTS file_access_log ( CREATE TABLE IF NOT EXISTS file_access_log (
id INTEGER PRIMARY KEY AUTOINCREMENT, id SERIAL PRIMARY KEY,
timestamp TEXT, timestamp TIMESTAMP,
rel_path TEXT, rel_path TEXT,
filesize BIGINT,
mime TEXT,
ip_address TEXT, ip_address TEXT,
user_agent TEXT, user_agent TEXT,
device_id TEXT device_id TEXT,
cached BOOLEAN
) )
''') ''')
# Gather information from the request
timestamp = datetime.now().isoformat()
# Insert the access record into the database # Logging function that uses the singleton connection.
def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
timestamp = datetime.now() # Use datetime object directly
with log_db.connection.cursor() as cursor:
cursor.execute(''' cursor.execute('''
INSERT INTO file_access_log (timestamp, rel_path, ip_address, user_agent, device_id) INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
VALUES (?, ?, ?, ?, ?) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
''', (timestamp, rel_path, ip_address, user_agent, device_id)) ''', (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
conn.commit() return timestamp.isoformat()
conn.close()
file_access_temp.insert(0, [timestamp, rel_path, ip_address, user_agent, device_id])
return return_file_access()
def return_file_access(): def return_file_access():
global file_access_temp global file_access_temp
if len(file_access_temp) > 0: if file_access_temp:
# Compute the cutoff time (10 minutes ago from now)
cutoff_time = datetime.now() - timedelta(minutes=10) cutoff_time = datetime.now() - timedelta(minutes=10)
# Update the list in-place to keep only entries newer than 10 minutes
file_access_temp[:] = [ file_access_temp[:] = [
entry for entry in file_access_temp entry for entry in file_access_temp
if datetime.fromisoformat(entry[0]) >= cutoff_time if datetime.fromisoformat(entry[0]) >= cutoff_time
@ -99,106 +123,96 @@ def dashboard():
else: else:
start = now.replace(hour=0, minute=0, second=0, microsecond=0) start = now.replace(hour=0, minute=0, second=0, microsecond=0)
conn = sqlite3.connect('access_log.db') with log_db.connection.cursor() as cursor:
cursor = conn.cursor()
# Raw file access counts for the table (top files) # Raw file access counts for the table (top files)
cursor.execute(''' cursor.execute('''
SELECT rel_path, COUNT(*) as access_count SELECT rel_path, COUNT(*) as access_count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY rel_path GROUP BY rel_path
ORDER BY access_count DESC ORDER BY access_count DESC
LIMIT 20 LIMIT 20
''', (start.isoformat(),)) ''', (start,))
rows = cursor.fetchall() rows = cursor.fetchall()
# Daily access trend for a line chart # Daily access trend for a line chart
cursor.execute(''' cursor.execute('''
SELECT date(timestamp) as date, COUNT(*) as count SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY date GROUP BY CAST(timestamp AS DATE)
ORDER BY date ORDER BY date
''', (start.isoformat(),)) ''', (start,))
daily_access_data = [dict(date=row[0], count=row[1]) for row in cursor.fetchall()] daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
# Aggregate download counts by time bucket according to the timeframe. # Aggregate download counts by time bucket according to the timeframe.
if timeframe == 'today': if timeframe == 'today':
# Group by hour (0-23) # Group by hour using to_char
cursor.execute(''' cursor.execute('''
SELECT strftime('%H', timestamp) as bucket, COUNT(*) as count SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start.isoformat(),)) ''', (start,))
elif timeframe in ('7days', '30days'): elif timeframe in ('7days', '30days'):
# Group by day (YYYY-MM-DD) # Group by day
cursor.execute(''' cursor.execute('''
SELECT date(timestamp) as bucket, COUNT(*) as count SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start.isoformat(),)) ''', (start,))
elif timeframe == '365days': elif timeframe == '365days':
# Group by month (YYYY-MM) # Group by month using to_char
cursor.execute(''' cursor.execute('''
SELECT strftime('%Y-%m', timestamp) as bucket, COUNT(*) as count SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start.isoformat(),)) ''', (start,))
else: else:
# Fallback: group by day # Fallback: group by day
cursor.execute(''' cursor.execute('''
SELECT date(timestamp) as bucket, COUNT(*) as count SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start.isoformat(),)) ''', (start,))
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()] timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
# User agent distribution (aggregate by device type) # User agent distribution (aggregate by device type)
cursor.execute(''' cursor.execute('''
SELECT user_agent, COUNT(*) as count SELECT user_agent, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY user_agent GROUP BY user_agent
ORDER BY count DESC ORDER BY count DESC
''', (start.isoformat(),)) ''', (start,))
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()] raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
device_counts = {} device_counts = {}
for entry in raw_user_agents: for entry in raw_user_agents:
device = get_device_type(entry['user_agent']) device = get_device_type(entry['user_agent'])
device_counts[device] = device_counts.get(device, 0) + entry['count'] device_counts[device] = device_counts.get(device, 0) + entry['count']
# Rename to user_agent_data for compatibility with the frontend
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()] user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
# Parent folder distribution # Parent folder distribution
cursor.execute(''' cursor.execute('''
SELECT rel_path, COUNT(*) as count SELECT rel_path, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY rel_path GROUP BY rel_path
ORDER BY count DESC ORDER BY count DESC
''', (start.isoformat(),)) ''', (start,))
folder_data = {} folder_data = {}
for row in cursor.fetchall(): for row in cursor.fetchall():
rel_path = row[0] rel_path = row[0]
parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root" parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root"
folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1] folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1]
folder_data = [dict(folder=folder, count=count) for folder, count in folder_data.items()]
# Convert the dictionary to a list of dictionaries
folder_data = [
dict(folder=folder, count=count)
for folder, count in folder_data.items()
]
# Sort by count in descending order and take the top 10
folder_data.sort(key=lambda x: x['count'], reverse=True) folder_data.sort(key=lambda x: x['count'], reverse=True)
folder_data = folder_data[:10] folder_data = folder_data[:10]
@ -206,48 +220,35 @@ def dashboard():
cursor.execute(''' cursor.execute('''
SELECT ip_address, COUNT(*) as count SELECT ip_address, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY ip_address GROUP BY ip_address
ORDER BY count DESC ORDER BY count DESC
''', (start.isoformat(),)) ''', (start,))
ip_rows = cursor.fetchall() ip_rows = cursor.fetchall()
# Initialize GeoIP2 reader once for efficiency # Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,))
total_accesses = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,))
unique_files = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,))
unique_user = cursor.fetchone()[0]
# Process location data with GeoIP2.
reader = geoip2.database.Reader('GeoLite2-City.mmdb') reader = geoip2.database.Reader('GeoLite2-City.mmdb')
location_data = {} location_data = {}
for ip, count in ip_rows: for ip, count in ip_rows:
country, city = lookup_location(ip, reader) country, city = lookup_location(ip, reader)
key = (country, city) key = (country, city)
if key in location_data: location_data[key] = location_data.get(key, 0) + count
location_data[key] += count
else:
location_data[key] = count
reader.close() reader.close()
# Convert the dictionary to a list of dictionaries location_data = [dict(country=key[0], city=key[1], count=value) for key, value in location_data.items()]
location_data = [
dict(country=key[0], city=key[1], count=value)
for key, value in location_data.items()
]
# Sort by count in descending order and take the top 20
location_data.sort(key=lambda x: x['count'], reverse=True) location_data.sort(key=lambda x: x['count'], reverse=True)
location_data = location_data[:20] location_data = location_data[:20]
# Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
total_accesses = cursor.fetchone()[0]
# Use a separate query to count unique files (distinct rel_path values)
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_files = cursor.fetchone()[0]
# Use a separate query to count unique IP addresses
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_user = cursor.fetchone()[0]
conn.close()
return render_template("dashboard.html", return render_template("dashboard.html",
timeframe=timeframe, timeframe=timeframe,
rows=rows, rows=rows,
@ -259,3 +260,6 @@ def dashboard():
unique_files=unique_files, unique_files=unique_files,
unique_user=unique_user, unique_user=unique_user,
timeframe_data=timeframe_data) timeframe_data=timeframe_data)
if __name__ == '__main__':
init_log_db()

35
app.py
View File

@ -197,21 +197,9 @@ def serve_file(subpath):
mime, _ = mimetypes.guess_type(full_path) mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream' mime = mime or 'application/octet-stream'
# logging only for mp3
if mime and mime.startswith('audio/mpeg'):
# HEAD request are coming in to initiate server caching.
# only log initial hits and not the reload of further file parts
range_header = request.headers.get('Range') range_header = request.headers.get('Range')
# only request with starting from the beginning of the file will be tracked
# no range -> full file not just the first byte
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
ip_address = request.remote_addr ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent') user_agent = request.headers.get('User-Agent')
threading.Thread(
target=a.log_file_access,
args=(subpath, ip_address, user_agent, session['device_id'])
).start()
# Check cache first (using diskcache) # Check cache first (using diskcache)
response = None response = None
@ -231,6 +219,7 @@ def serve_file(subpath):
if cached: if cached:
cached_file_bytes, mime = cached cached_file_bytes, mime = cached
cached_file = io.BytesIO(cached_file_bytes) cached_file = io.BytesIO(cached_file_bytes)
filesize = len(cached_file.getbuffer())
response = send_file(cached_file, mimetype=mime) response = send_file(cached_file, mimetype=mime)
else: else:
if mime and mime.startswith('image/'): if mime and mime.startswith('image/'):
@ -245,6 +234,7 @@ def serve_file(subpath):
save_kwargs = {'quality': 85} save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO() img_bytes_io = io.BytesIO()
filesize = len(img_bytes_io.getbuffer())
img.save(img_bytes_io, format=output_format, **save_kwargs) img.save(img_bytes_io, format=output_format, **save_kwargs)
thumb_bytes = img_bytes_io.getvalue() thumb_bytes = img_bytes_io.getvalue()
cache.set(subpath, (thumb_bytes, output_mime)) cache.set(subpath, (thumb_bytes, output_mime))
@ -258,13 +248,32 @@ def serve_file(subpath):
with open(full_path, 'rb') as f: with open(full_path, 'rb') as f:
file_bytes = f.read() file_bytes = f.read()
cache.set(subpath, (file_bytes, mime)) cache.set(subpath, (file_bytes, mime))
response = send_file(io.BytesIO(file_bytes), mimetype=mime, conditional=True) file_bytes_io = io.BytesIO(file_bytes)
filesize = len(file_bytes_io.getbuffer())
response = send_file(file_bytes_io, mimetype=mime, conditional=True)
except Exception as e: except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}") app.logger.error(f"Failed to read file {subpath}: {e}")
abort(500) abort(500)
# Set Cache-Control header (browser caching for 1 day) # Set Cache-Control header (browser caching for 1 day)
response.headers['Cache-Control'] = 'public, max-age=86400' response.headers['Cache-Control'] = 'public, max-age=86400'
if mime and mime.startswith('audio/mpeg'): # special rules for mp3 files
# HEAD request are coming in to initiate server caching. Ignore HEAD Request. Only log GET request.
# log access if there is no range header. # log access if range request starts from 0 but is larger then only from 0 to 1 (bytes=0-1)
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
logging = True
else:
logging = False
else:
logging = True
if logging:
threading.Thread(
target=a.log_file_access,
args=(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached), )
).start()
return response return response

View File

@ -1,7 +1,7 @@
services: services:
flask-app: flask-app:
image: python:3.11-slim image: python:3.11-slim
container_name: "${CONTAINER_NAME}" container_name: "${CONTAINER_NAME}.web"
restart: always restart: always
working_dir: /app working_dir: /app
volumes: volumes:
@ -19,8 +19,16 @@ services:
- FLASK_ENV=production - FLASK_ENV=production
- TITLE_SHORT=${TITLE_SHORT} - TITLE_SHORT=${TITLE_SHORT}
- TITLE_LONG=${TITLE_LONG} - TITLE_LONG=${TITLE_LONG}
- DB_HOST=postgres
- DB_PORT=5432
- DB_USER=${POSTGRES_USER}
- DB_PASSWORD=${POSTGRES_PASSWORD}
- DB_NAME=${POSTGRES_DB}
depends_on:
- postgres
networks: networks:
- traefik - traefik
- internal
labels: labels:
- "traefik.enable=true" - "traefik.enable=true"
@ -44,6 +52,21 @@ services:
sh -c "pip install -r requirements.txt && sh -c "pip install -r requirements.txt &&
gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app" gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app"
postgres:
image: postgres:15
container_name: "${CONTAINER_NAME}.postgres"
restart: always
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- ./postgres_data:/var/lib/postgresql/data
networks:
- internal
networks: networks:
traefik: traefik:
external: true external: true
internal:
internal: true

View File

@ -6,3 +6,4 @@ diskcache
geoip2 geoip2
gunicorn gunicorn
eventlet eventlet
psycopg2-binary