initial postgres

This commit is contained in:
lelo 2025-03-31 17:37:39 +00:00
parent 71c0585380
commit bc4aa70651
5 changed files with 225 additions and 187 deletions

1
.gitignore vendored
View File

@ -4,6 +4,7 @@
/filecache_image /filecache_image
/filecache_video /filecache_video
/filecache_other /filecache_other
/postgres_data
/instance /instance
/__pycache__ /__pycache__
/access_log.db /access_log.db

View File

@ -1,13 +1,45 @@
from flask import render_template, request, session from flask import render_template, request, session
import sqlite3 from datetime import datetime, timedelta
from datetime import datetime, date, timedelta
import geoip2.database import geoip2.database
from urllib.parse import urlparse, unquote from urllib.parse import urlparse, unquote
from auth import require_secret from auth import require_secret
import os
import threading
import psycopg2
file_access_temp = [] file_access_temp = []
# Thread-safe singleton metaclass.
class SingletonMeta(type):
_instances = {}
_lock = threading.Lock() # Ensures thread safety.
def __call__(cls, *args, **kwargs):
with cls._lock:
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs)
cls._instances[cls] = instance
return cls._instances[cls]
# Database class that only handles the connection.
class Database(metaclass=SingletonMeta):
def __init__(self):
self.dbname = os.environ.get('DB_NAME')
self.user = os.environ.get('DB_USER')
self.password = os.environ.get('DB_PASSWORD')
self.host = os.environ.get('DB_HOST')
self.port = int(os.environ.get('DB_PORT', 5432))
self.connection = psycopg2.connect(dbname=self.dbname,
user=self.user,
password=self.password,
host=self.host,
port=self.port)
# Enable autocommit so we don't have to call commit() after every transaction.
self.connection.autocommit = True
# Create a global database instance.
log_db = Database()
def lookup_location(ip, reader): def lookup_location(ip, reader):
try: try:
response = reader.city(ip) response = reader.city(ip)
@ -18,7 +50,7 @@ def lookup_location(ip, reader):
return "Unknown", "Unknown" return "Unknown", "Unknown"
def get_device_type(user_agent): def get_device_type(user_agent):
"classify device type based on user agent string" "Classify device type based on user agent string"
if 'Android' in user_agent: if 'Android' in user_agent:
return 'Android' return 'Android'
elif 'iPhone' in user_agent or 'iPad' in user_agent: elif 'iPhone' in user_agent or 'iPad' in user_agent:
@ -32,47 +64,39 @@ def get_device_type(user_agent):
else: else:
return 'Other' return 'Other'
def log_file_access(rel_path, ip_address, user_agent, device_id): # Function to initialize the database.
""" def init_log_db():
Log file access details to a SQLite database. with log_db.connection.cursor() as cursor:
Records the timestamp, full file path, client IP, user agent, and device_id. cursor.execute('''
""" CREATE TABLE IF NOT EXISTS file_access_log (
global file_access_temp id SERIAL PRIMARY KEY,
# Connect to the database (this will create the file if it doesn't exist) timestamp TIMESTAMP,
conn = sqlite3.connect('access_log.db') rel_path TEXT,
cursor = conn.cursor() filesize BIGINT,
# Create the table if it doesn't exist mime TEXT,
cursor.execute(''' ip_address TEXT,
CREATE TABLE IF NOT EXISTS file_access_log ( user_agent TEXT,
id INTEGER PRIMARY KEY AUTOINCREMENT, device_id TEXT,
timestamp TEXT, cached BOOLEAN
rel_path TEXT, )
ip_address TEXT, ''')
user_agent TEXT,
device_id TEXT # Logging function that uses the singleton connection.
) def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
''') timestamp = datetime.now() # Use datetime object directly
# Gather information from the request with log_db.connection.cursor() as cursor:
timestamp = datetime.now().isoformat() cursor.execute('''
INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
# Insert the access record into the database VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
cursor.execute(''' ''', (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
INSERT INTO file_access_log (timestamp, rel_path, ip_address, user_agent, device_id) return timestamp.isoformat()
VALUES (?, ?, ?, ?, ?)
''', (timestamp, rel_path, ip_address, user_agent, device_id))
conn.commit()
conn.close()
file_access_temp.insert(0, [timestamp, rel_path, ip_address, user_agent, device_id])
return return_file_access()
def return_file_access(): def return_file_access():
global file_access_temp global file_access_temp
if len(file_access_temp) > 0: if file_access_temp:
# Compute the cutoff time (10 minutes ago from now)
cutoff_time = datetime.now() - timedelta(minutes=10) cutoff_time = datetime.now() - timedelta(minutes=10)
# Update the list in-place to keep only entries newer than 10 minutes
file_access_temp[:] = [ file_access_temp[:] = [
entry for entry in file_access_temp entry for entry in file_access_temp
if datetime.fromisoformat(entry[0]) >= cutoff_time if datetime.fromisoformat(entry[0]) >= cutoff_time
] ]
return file_access_temp return file_access_temp
@ -99,155 +123,132 @@ def dashboard():
else: else:
start = now.replace(hour=0, minute=0, second=0, microsecond=0) start = now.replace(hour=0, minute=0, second=0, microsecond=0)
conn = sqlite3.connect('access_log.db') with log_db.connection.cursor() as cursor:
cursor = conn.cursor() # Raw file access counts for the table (top files)
# Raw file access counts for the table (top files)
cursor.execute('''
SELECT rel_path, COUNT(*) as access_count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY rel_path
ORDER BY access_count DESC
LIMIT 20
''', (start.isoformat(),))
rows = cursor.fetchall()
# Daily access trend for a line chart
cursor.execute('''
SELECT date(timestamp) as date, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY date
ORDER BY date
''', (start.isoformat(),))
daily_access_data = [dict(date=row[0], count=row[1]) for row in cursor.fetchall()]
# Aggregate download counts by time bucket according to the timeframe.
if timeframe == 'today':
# Group by hour (0-23)
cursor.execute(''' cursor.execute('''
SELECT strftime('%H', timestamp) as bucket, COUNT(*) as count SELECT rel_path, COUNT(*) as access_count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY rel_path
ORDER BY bucket ORDER BY access_count DESC
''', (start.isoformat(),)) LIMIT 20
elif timeframe in ('7days', '30days'): ''', (start,))
# Group by day (YYYY-MM-DD) rows = cursor.fetchall()
# Daily access trend for a line chart
cursor.execute(''' cursor.execute('''
SELECT date(timestamp) as bucket, COUNT(*) as count SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY CAST(timestamp AS DATE)
ORDER BY bucket ORDER BY date
''', (start.isoformat(),)) ''', (start,))
elif timeframe == '365days': daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
# Group by month (YYYY-MM)
# Aggregate download counts by time bucket according to the timeframe.
if timeframe == 'today':
# Group by hour using to_char
cursor.execute('''
SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start,))
elif timeframe in ('7days', '30days'):
# Group by day
cursor.execute('''
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start,))
elif timeframe == '365days':
# Group by month using to_char
cursor.execute('''
SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start,))
else:
# Fallback: group by day
cursor.execute('''
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start,))
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
# User agent distribution (aggregate by device type)
cursor.execute(''' cursor.execute('''
SELECT strftime('%Y-%m', timestamp) as bucket, COUNT(*) as count SELECT user_agent, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY user_agent
ORDER BY bucket ORDER BY count DESC
''', (start.isoformat(),)) ''', (start,))
else: raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
# Fallback: group by day device_counts = {}
for entry in raw_user_agents:
device = get_device_type(entry['user_agent'])
device_counts[device] = device_counts.get(device, 0) + entry['count']
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
# Parent folder distribution
cursor.execute(''' cursor.execute('''
SELECT date(timestamp) as bucket, COUNT(*) as count SELECT rel_path, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY bucket GROUP BY rel_path
ORDER BY bucket ORDER BY count DESC
''', (start.isoformat(),)) ''', (start,))
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()] folder_data = {}
for row in cursor.fetchall():
rel_path = row[0]
parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root"
folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1]
folder_data = [dict(folder=folder, count=count) for folder, count in folder_data.items()]
folder_data.sort(key=lambda x: x['count'], reverse=True)
folder_data = folder_data[:10]
# User agent distribution (aggregate by device type) # Aggregate IP addresses with counts
cursor.execute(''' cursor.execute('''
SELECT user_agent, COUNT(*) as count SELECT ip_address, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? WHERE timestamp >= %s
GROUP BY user_agent GROUP BY ip_address
ORDER BY count DESC ORDER BY count DESC
''', (start.isoformat(),)) ''', (start,))
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()] ip_rows = cursor.fetchall()
device_counts = {}
for entry in raw_user_agents:
device = get_device_type(entry['user_agent'])
device_counts[device] = device_counts.get(device, 0) + entry['count']
# Rename to user_agent_data for compatibility with the frontend
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
# Parent folder distribution # Summary stats using separate SQL queries
cursor.execute(''' cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,))
SELECT rel_path, COUNT(*) as count total_accesses = cursor.fetchone()[0]
FROM file_access_log
WHERE timestamp >= ?
GROUP BY rel_path
ORDER BY count DESC
''', (start.isoformat(),))
folder_data = {}
for row in cursor.fetchall():
rel_path = row[0]
parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root"
folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1]
# Convert the dictionary to a list of dictionaries
folder_data = [
dict(folder=folder, count=count)
for folder, count in folder_data.items()
]
# Sort by count in descending order and take the top 10
folder_data.sort(key=lambda x: x['count'], reverse=True)
folder_data = folder_data[:10]
# Aggregate IP addresses with counts cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,))
cursor.execute(''' unique_files = cursor.fetchone()[0]
SELECT ip_address, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY ip_address
ORDER BY count DESC
''', (start.isoformat(),))
ip_rows = cursor.fetchall()
# Initialize GeoIP2 reader once for efficiency cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,))
unique_user = cursor.fetchone()[0]
# Process location data with GeoIP2.
reader = geoip2.database.Reader('GeoLite2-City.mmdb') reader = geoip2.database.Reader('GeoLite2-City.mmdb')
location_data = {} location_data = {}
for ip, count in ip_rows: for ip, count in ip_rows:
country, city = lookup_location(ip, reader) country, city = lookup_location(ip, reader)
key = (country, city) key = (country, city)
if key in location_data: location_data[key] = location_data.get(key, 0) + count
location_data[key] += count
else:
location_data[key] = count
reader.close() reader.close()
# Convert the dictionary to a list of dictionaries location_data = [dict(country=key[0], city=key[1], count=value) for key, value in location_data.items()]
location_data = [
dict(country=key[0], city=key[1], count=value)
for key, value in location_data.items()
]
# Sort by count in descending order and take the top 20
location_data.sort(key=lambda x: x['count'], reverse=True) location_data.sort(key=lambda x: x['count'], reverse=True)
location_data = location_data[:20] location_data = location_data[:20]
# Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
total_accesses = cursor.fetchone()[0]
# Use a separate query to count unique files (distinct rel_path values)
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_files = cursor.fetchone()[0]
# Use a separate query to count unique IP addresses
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_user = cursor.fetchone()[0]
conn.close()
return render_template("dashboard.html", return render_template("dashboard.html",
timeframe=timeframe, timeframe=timeframe,
rows=rows, rows=rows,
@ -258,4 +259,7 @@ def dashboard():
total_accesses=total_accesses, total_accesses=total_accesses,
unique_files=unique_files, unique_files=unique_files,
unique_user=unique_user, unique_user=unique_user,
timeframe_data=timeframe_data) timeframe_data=timeframe_data)
if __name__ == '__main__':
init_log_db()

43
app.py
View File

@ -186,7 +186,7 @@ def api_browse(subpath):
@app.route("/media/<path:subpath>") @app.route("/media/<path:subpath>")
@auth.require_secret @auth.require_secret
def serve_file(subpath): def serve_file(subpath):
root, *relative_parts = subpath.split('/') root, *relative_parts = subpath.split('/')
base_path = session['folders'][root] base_path = session['folders'][root]
full_path = os.path.join(base_path, *relative_parts) full_path = os.path.join(base_path, *relative_parts)
@ -197,21 +197,9 @@ def serve_file(subpath):
mime, _ = mimetypes.guess_type(full_path) mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream' mime = mime or 'application/octet-stream'
range_header = request.headers.get('Range')
# logging only for mp3 ip_address = request.remote_addr
if mime and mime.startswith('audio/mpeg'): user_agent = request.headers.get('User-Agent')
# HEAD request are coming in to initiate server caching.
# only log initial hits and not the reload of further file parts
range_header = request.headers.get('Range')
# only request with starting from the beginning of the file will be tracked
# no range -> full file not just the first byte
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
threading.Thread(
target=a.log_file_access,
args=(subpath, ip_address, user_agent, session['device_id'])
).start()
# Check cache first (using diskcache) # Check cache first (using diskcache)
response = None response = None
@ -231,6 +219,7 @@ def serve_file(subpath):
if cached: if cached:
cached_file_bytes, mime = cached cached_file_bytes, mime = cached
cached_file = io.BytesIO(cached_file_bytes) cached_file = io.BytesIO(cached_file_bytes)
filesize = len(cached_file.getbuffer())
response = send_file(cached_file, mimetype=mime) response = send_file(cached_file, mimetype=mime)
else: else:
if mime and mime.startswith('image/'): if mime and mime.startswith('image/'):
@ -245,6 +234,7 @@ def serve_file(subpath):
save_kwargs = {'quality': 85} save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO() img_bytes_io = io.BytesIO()
filesize = len(img_bytes_io.getbuffer())
img.save(img_bytes_io, format=output_format, **save_kwargs) img.save(img_bytes_io, format=output_format, **save_kwargs)
thumb_bytes = img_bytes_io.getvalue() thumb_bytes = img_bytes_io.getvalue()
cache.set(subpath, (thumb_bytes, output_mime)) cache.set(subpath, (thumb_bytes, output_mime))
@ -258,13 +248,32 @@ def serve_file(subpath):
with open(full_path, 'rb') as f: with open(full_path, 'rb') as f:
file_bytes = f.read() file_bytes = f.read()
cache.set(subpath, (file_bytes, mime)) cache.set(subpath, (file_bytes, mime))
response = send_file(io.BytesIO(file_bytes), mimetype=mime, conditional=True) file_bytes_io = io.BytesIO(file_bytes)
filesize = len(file_bytes_io.getbuffer())
response = send_file(file_bytes_io, mimetype=mime, conditional=True)
except Exception as e: except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}") app.logger.error(f"Failed to read file {subpath}: {e}")
abort(500) abort(500)
# Set Cache-Control header (browser caching for 1 day) # Set Cache-Control header (browser caching for 1 day)
response.headers['Cache-Control'] = 'public, max-age=86400' response.headers['Cache-Control'] = 'public, max-age=86400'
if mime and mime.startswith('audio/mpeg'): # special rules for mp3 files
# HEAD request are coming in to initiate server caching. Ignore HEAD Request. Only log GET request.
# log access if there is no range header. # log access if range request starts from 0 but is larger then only from 0 to 1 (bytes=0-1)
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
logging = True
else:
logging = False
else:
logging = True
if logging:
threading.Thread(
target=a.log_file_access,
args=(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached), )
).start()
return response return response

View File

@ -1,7 +1,7 @@
services: services:
flask-app: flask-app:
image: python:3.11-slim image: python:3.11-slim
container_name: "${CONTAINER_NAME}" container_name: "${CONTAINER_NAME}.web"
restart: always restart: always
working_dir: /app working_dir: /app
volumes: volumes:
@ -19,8 +19,16 @@ services:
- FLASK_ENV=production - FLASK_ENV=production
- TITLE_SHORT=${TITLE_SHORT} - TITLE_SHORT=${TITLE_SHORT}
- TITLE_LONG=${TITLE_LONG} - TITLE_LONG=${TITLE_LONG}
- DB_HOST=postgres
- DB_PORT=5432
- DB_USER=${POSTGRES_USER}
- DB_PASSWORD=${POSTGRES_PASSWORD}
- DB_NAME=${POSTGRES_DB}
depends_on:
- postgres
networks: networks:
- traefik - traefik
- internal
labels: labels:
- "traefik.enable=true" - "traefik.enable=true"
@ -44,6 +52,21 @@ services:
sh -c "pip install -r requirements.txt && sh -c "pip install -r requirements.txt &&
gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app" gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app"
postgres:
image: postgres:15
container_name: "${CONTAINER_NAME}.postgres"
restart: always
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- ./postgres_data:/var/lib/postgresql/data
networks:
- internal
networks: networks:
traefik: traefik:
external: true external: true
internal:
internal: true

View File

@ -6,3 +6,4 @@ diskcache
geoip2 geoip2
gunicorn gunicorn
eventlet eventlet
psycopg2-binary