initial postgres

This commit is contained in:
lelo 2025-03-31 17:37:39 +00:00
parent 71c0585380
commit bc4aa70651
5 changed files with 225 additions and 187 deletions

1
.gitignore vendored
View File

@ -4,6 +4,7 @@
/filecache_image
/filecache_video
/filecache_other
/postgres_data
/instance
/__pycache__
/access_log.db

View File

@ -1,13 +1,45 @@
from flask import render_template, request, session
import sqlite3
from datetime import datetime, date, timedelta
from datetime import datetime, timedelta
import geoip2.database
from urllib.parse import urlparse, unquote
from auth import require_secret
import os
import threading
import psycopg2
file_access_temp = []
# Thread-safe singleton metaclass.
class SingletonMeta(type):
_instances = {}
_lock = threading.Lock() # Ensures thread safety.
def __call__(cls, *args, **kwargs):
with cls._lock:
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs)
cls._instances[cls] = instance
return cls._instances[cls]
# Database class that only handles the connection.
class Database(metaclass=SingletonMeta):
def __init__(self):
self.dbname = os.environ.get('DB_NAME')
self.user = os.environ.get('DB_USER')
self.password = os.environ.get('DB_PASSWORD')
self.host = os.environ.get('DB_HOST')
self.port = int(os.environ.get('DB_PORT', 5432))
self.connection = psycopg2.connect(dbname=self.dbname,
user=self.user,
password=self.password,
host=self.host,
port=self.port)
# Enable autocommit so we don't have to call commit() after every transaction.
self.connection.autocommit = True
# Create a global database instance.
log_db = Database()
def lookup_location(ip, reader):
try:
response = reader.city(ip)
@ -18,7 +50,7 @@ def lookup_location(ip, reader):
return "Unknown", "Unknown"
def get_device_type(user_agent):
"classify device type based on user agent string"
"Classify device type based on user agent string"
if 'Android' in user_agent:
return 'Android'
elif 'iPhone' in user_agent or 'iPad' in user_agent:
@ -32,45 +64,37 @@ def get_device_type(user_agent):
else:
return 'Other'
def log_file_access(rel_path, ip_address, user_agent, device_id):
"""
Log file access details to a SQLite database.
Records the timestamp, full file path, client IP, user agent, and device_id.
"""
global file_access_temp
# Connect to the database (this will create the file if it doesn't exist)
conn = sqlite3.connect('access_log.db')
cursor = conn.cursor()
# Create the table if it doesn't exist
# Function to initialize the database.
def init_log_db():
with log_db.connection.cursor() as cursor:
cursor.execute('''
CREATE TABLE IF NOT EXISTS file_access_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT,
id SERIAL PRIMARY KEY,
timestamp TIMESTAMP,
rel_path TEXT,
filesize BIGINT,
mime TEXT,
ip_address TEXT,
user_agent TEXT,
device_id TEXT
device_id TEXT,
cached BOOLEAN
)
''')
# Gather information from the request
timestamp = datetime.now().isoformat()
# Insert the access record into the database
# Logging function that uses the singleton connection.
def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
timestamp = datetime.now() # Use datetime object directly
with log_db.connection.cursor() as cursor:
cursor.execute('''
INSERT INTO file_access_log (timestamp, rel_path, ip_address, user_agent, device_id)
VALUES (?, ?, ?, ?, ?)
''', (timestamp, rel_path, ip_address, user_agent, device_id))
conn.commit()
conn.close()
file_access_temp.insert(0, [timestamp, rel_path, ip_address, user_agent, device_id])
return return_file_access()
INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
''', (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
return timestamp.isoformat()
def return_file_access():
global file_access_temp
if len(file_access_temp) > 0:
# Compute the cutoff time (10 minutes ago from now)
if file_access_temp:
cutoff_time = datetime.now() - timedelta(minutes=10)
# Update the list in-place to keep only entries newer than 10 minutes
file_access_temp[:] = [
entry for entry in file_access_temp
if datetime.fromisoformat(entry[0]) >= cutoff_time
@ -99,106 +123,96 @@ def dashboard():
else:
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
conn = sqlite3.connect('access_log.db')
cursor = conn.cursor()
with log_db.connection.cursor() as cursor:
# Raw file access counts for the table (top files)
cursor.execute('''
SELECT rel_path, COUNT(*) as access_count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY rel_path
ORDER BY access_count DESC
LIMIT 20
''', (start.isoformat(),))
''', (start,))
rows = cursor.fetchall()
# Daily access trend for a line chart
cursor.execute('''
SELECT date(timestamp) as date, COUNT(*) as count
SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
GROUP BY date
WHERE timestamp >= %s
GROUP BY CAST(timestamp AS DATE)
ORDER BY date
''', (start.isoformat(),))
daily_access_data = [dict(date=row[0], count=row[1]) for row in cursor.fetchall()]
''', (start,))
daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
# Aggregate download counts by time bucket according to the timeframe.
if timeframe == 'today':
# Group by hour (0-23)
# Group by hour using to_char
cursor.execute('''
SELECT strftime('%H', timestamp) as bucket, COUNT(*) as count
SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start.isoformat(),))
''', (start,))
elif timeframe in ('7days', '30days'):
# Group by day (YYYY-MM-DD)
# Group by day
cursor.execute('''
SELECT date(timestamp) as bucket, COUNT(*) as count
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start.isoformat(),))
''', (start,))
elif timeframe == '365days':
# Group by month (YYYY-MM)
# Group by month using to_char
cursor.execute('''
SELECT strftime('%Y-%m', timestamp) as bucket, COUNT(*) as count
SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start.isoformat(),))
''', (start,))
else:
# Fallback: group by day
cursor.execute('''
SELECT date(timestamp) as bucket, COUNT(*) as count
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY bucket
ORDER BY bucket
''', (start.isoformat(),))
''', (start,))
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
# User agent distribution (aggregate by device type)
cursor.execute('''
SELECT user_agent, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY user_agent
ORDER BY count DESC
''', (start.isoformat(),))
''', (start,))
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
device_counts = {}
for entry in raw_user_agents:
device = get_device_type(entry['user_agent'])
device_counts[device] = device_counts.get(device, 0) + entry['count']
# Rename to user_agent_data for compatibility with the frontend
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
# Parent folder distribution
cursor.execute('''
SELECT rel_path, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY rel_path
ORDER BY count DESC
''', (start.isoformat(),))
''', (start,))
folder_data = {}
for row in cursor.fetchall():
rel_path = row[0]
parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root"
folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1]
# Convert the dictionary to a list of dictionaries
folder_data = [
dict(folder=folder, count=count)
for folder, count in folder_data.items()
]
# Sort by count in descending order and take the top 10
folder_data = [dict(folder=folder, count=count) for folder, count in folder_data.items()]
folder_data.sort(key=lambda x: x['count'], reverse=True)
folder_data = folder_data[:10]
@ -206,48 +220,35 @@ def dashboard():
cursor.execute('''
SELECT ip_address, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= ?
WHERE timestamp >= %s
GROUP BY ip_address
ORDER BY count DESC
''', (start.isoformat(),))
''', (start,))
ip_rows = cursor.fetchall()
# Initialize GeoIP2 reader once for efficiency
# Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,))
total_accesses = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,))
unique_files = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,))
unique_user = cursor.fetchone()[0]
# Process location data with GeoIP2.
reader = geoip2.database.Reader('GeoLite2-City.mmdb')
location_data = {}
for ip, count in ip_rows:
country, city = lookup_location(ip, reader)
key = (country, city)
if key in location_data:
location_data[key] += count
else:
location_data[key] = count
location_data[key] = location_data.get(key, 0) + count
reader.close()
# Convert the dictionary to a list of dictionaries
location_data = [
dict(country=key[0], city=key[1], count=value)
for key, value in location_data.items()
]
# Sort by count in descending order and take the top 20
location_data = [dict(country=key[0], city=key[1], count=value) for key, value in location_data.items()]
location_data.sort(key=lambda x: x['count'], reverse=True)
location_data = location_data[:20]
# Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
total_accesses = cursor.fetchone()[0]
# Use a separate query to count unique files (distinct rel_path values)
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_files = cursor.fetchone()[0]
# Use a separate query to count unique IP addresses
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
unique_user = cursor.fetchone()[0]
conn.close()
return render_template("dashboard.html",
timeframe=timeframe,
rows=rows,
@ -259,3 +260,6 @@ def dashboard():
unique_files=unique_files,
unique_user=unique_user,
timeframe_data=timeframe_data)
if __name__ == '__main__':
init_log_db()

35
app.py
View File

@ -197,21 +197,9 @@ def serve_file(subpath):
mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream'
# logging only for mp3
if mime and mime.startswith('audio/mpeg'):
# HEAD request are coming in to initiate server caching.
# only log initial hits and not the reload of further file parts
range_header = request.headers.get('Range')
# only request with starting from the beginning of the file will be tracked
# no range -> full file not just the first byte
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
threading.Thread(
target=a.log_file_access,
args=(subpath, ip_address, user_agent, session['device_id'])
).start()
# Check cache first (using diskcache)
response = None
@ -231,6 +219,7 @@ def serve_file(subpath):
if cached:
cached_file_bytes, mime = cached
cached_file = io.BytesIO(cached_file_bytes)
filesize = len(cached_file.getbuffer())
response = send_file(cached_file, mimetype=mime)
else:
if mime and mime.startswith('image/'):
@ -245,6 +234,7 @@ def serve_file(subpath):
save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO()
filesize = len(img_bytes_io.getbuffer())
img.save(img_bytes_io, format=output_format, **save_kwargs)
thumb_bytes = img_bytes_io.getvalue()
cache.set(subpath, (thumb_bytes, output_mime))
@ -258,13 +248,32 @@ def serve_file(subpath):
with open(full_path, 'rb') as f:
file_bytes = f.read()
cache.set(subpath, (file_bytes, mime))
response = send_file(io.BytesIO(file_bytes), mimetype=mime, conditional=True)
file_bytes_io = io.BytesIO(file_bytes)
filesize = len(file_bytes_io.getbuffer())
response = send_file(file_bytes_io, mimetype=mime, conditional=True)
except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}")
abort(500)
# Set Cache-Control header (browser caching for 1 day)
response.headers['Cache-Control'] = 'public, max-age=86400'
if mime and mime.startswith('audio/mpeg'): # special rules for mp3 files
# HEAD request are coming in to initiate server caching. Ignore HEAD Request. Only log GET request.
# log access if there is no range header. # log access if range request starts from 0 but is larger then only from 0 to 1 (bytes=0-1)
if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
logging = True
else:
logging = False
else:
logging = True
if logging:
threading.Thread(
target=a.log_file_access,
args=(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached), )
).start()
return response

View File

@ -1,7 +1,7 @@
services:
flask-app:
image: python:3.11-slim
container_name: "${CONTAINER_NAME}"
container_name: "${CONTAINER_NAME}.web"
restart: always
working_dir: /app
volumes:
@ -19,8 +19,16 @@ services:
- FLASK_ENV=production
- TITLE_SHORT=${TITLE_SHORT}
- TITLE_LONG=${TITLE_LONG}
- DB_HOST=postgres
- DB_PORT=5432
- DB_USER=${POSTGRES_USER}
- DB_PASSWORD=${POSTGRES_PASSWORD}
- DB_NAME=${POSTGRES_DB}
depends_on:
- postgres
networks:
- traefik
- internal
labels:
- "traefik.enable=true"
@ -44,6 +52,21 @@ services:
sh -c "pip install -r requirements.txt &&
gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app"
postgres:
image: postgres:15
container_name: "${CONTAINER_NAME}.postgres"
restart: always
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- ./postgres_data:/var/lib/postgresql/data
networks:
- internal
networks:
traefik:
external: true
internal:
internal: true

View File

@ -6,3 +6,4 @@ diskcache
geoip2
gunicorn
eventlet
psycopg2-binary