initial postgres

2025-03-31 17:37:39 +00:00 · 2025-03-31 17:37:39 +00:00 · bc4aa70651
commit bc4aa70651
parent 71c0585380
5 changed files with 225 additions and 187 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,6 +4,7 @@
 /filecache_image
 /filecache_video
 /filecache_other
 /postgres_data
 /instance
 /__pycache__
 /access_log.db
--- a/analytics.py
+++ b/analytics.py
@ -1,13 +1,45 @@
 from flask import render_template, request, session
-import sqlite3
+from datetime import datetime, timedelta
 from datetime import datetime, date, timedelta
 import geoip2.database
 from urllib.parse import urlparse, unquote
 from auth import require_secret
 import os
 import threading
 import psycopg2
 file_access_temp = []
 # Thread-safe singleton metaclass.
 class SingletonMeta(type):
    _instances = {}
    _lock = threading.Lock()  # Ensures thread safety.
    def __call__(cls, *args, **kwargs):
        with cls._lock:
            if cls not in cls._instances:
                instance = super().__call__(*args, **kwargs)
                cls._instances[cls] = instance
        return cls._instances[cls]
 # Database class that only handles the connection.
 class Database(metaclass=SingletonMeta):
    def __init__(self):
        self.dbname     = os.environ.get('DB_NAME')
        self.user       = os.environ.get('DB_USER')
        self.password   = os.environ.get('DB_PASSWORD')
        self.host       = os.environ.get('DB_HOST')
        self.port       = int(os.environ.get('DB_PORT', 5432))
        self.connection = psycopg2.connect(dbname=self.dbname,
                                           user=self.user,
                                           password=self.password,
                                           host=self.host,
                                           port=self.port)
        # Enable autocommit so we don't have to call commit() after every transaction.
        self.connection.autocommit = True
 # Create a global database instance.
 log_db = Database()
 def lookup_location(ip, reader):
    try:
        response = reader.city(ip)
@ -18,7 +50,7 @@ def lookup_location(ip, reader):
        return "Unknown", "Unknown"
 def get_device_type(user_agent):
-    "classify device type based on user agent string"
+    "Classify device type based on user agent string"
    if 'Android' in user_agent:
        return 'Android'
    elif 'iPhone' in user_agent or 'iPad' in user_agent:
@ -32,45 +64,37 @@ def get_device_type(user_agent):
    else:
        return 'Other'
-def log_file_access(rel_path, ip_address, user_agent, device_id):
+# Function to initialize the database.
-    """
+def init_log_db():
-    Log file access details to a SQLite database.
+    with log_db.connection.cursor() as cursor:
    Records the timestamp, full file path, client IP, user agent, and device_id.
    """
    global file_access_temp
    # Connect to the database (this will create the file if it doesn't exist)
    conn = sqlite3.connect('access_log.db')
    cursor = conn.cursor()
    # Create the table if it doesn't exist
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS file_access_log (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
+                id SERIAL PRIMARY KEY,
-            timestamp TEXT,
+                timestamp TIMESTAMP,
                rel_path TEXT,
                filesize BIGINT,
                mime TEXT,
                ip_address TEXT,
                user_agent TEXT,
-            device_id TEXT
+                device_id TEXT,
                cached BOOLEAN
            )
        ''')
    # Gather information from the request
    timestamp = datetime.now().isoformat()
-    # Insert the access record into the database
+# Logging function that uses the singleton connection.
 def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
    timestamp = datetime.now()  # Use datetime object directly
    with log_db.connection.cursor() as cursor:
        cursor.execute('''
-        INSERT INTO file_access_log (timestamp, rel_path, ip_address, user_agent, device_id)
+            INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
-        VALUES (?, ?, ?, ?, ?)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
-    ''', (timestamp, rel_path, ip_address, user_agent, device_id))
+        ''', (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
-    conn.commit()
+    return timestamp.isoformat()
    conn.close()
    file_access_temp.insert(0, [timestamp, rel_path, ip_address, user_agent, device_id])
    return return_file_access()
 def return_file_access():
    global file_access_temp
-    if len(file_access_temp) > 0:
+    if file_access_temp:
        # Compute the cutoff time (10 minutes ago from now)
        cutoff_time = datetime.now() - timedelta(minutes=10)
        # Update the list in-place to keep only entries newer than 10 minutes
        file_access_temp[:] = [
            entry for entry in file_access_temp
            if datetime.fromisoformat(entry[0]) >= cutoff_time
@ -99,106 +123,96 @@ def dashboard():
    else:
        start = now.replace(hour=0, minute=0, second=0, microsecond=0)
-    conn = sqlite3.connect('access_log.db')
+    with log_db.connection.cursor() as cursor:
    cursor = conn.cursor()
        # Raw file access counts for the table (top files)
        cursor.execute('''
            SELECT rel_path, COUNT(*) as access_count
            FROM file_access_log
-        WHERE timestamp >= ?
+            WHERE timestamp >= %s
            GROUP BY rel_path
            ORDER BY access_count DESC
            LIMIT 20
-    ''', (start.isoformat(),))
+        ''', (start,))
        rows = cursor.fetchall()
        # Daily access trend for a line chart
        cursor.execute('''
-        SELECT date(timestamp) as date, COUNT(*) as count
+            SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
            FROM file_access_log
-        WHERE timestamp >= ?
+            WHERE timestamp >= %s
-        GROUP BY date
+            GROUP BY CAST(timestamp AS DATE)
            ORDER BY date
-    ''', (start.isoformat(),))
+        ''', (start,))
-    daily_access_data = [dict(date=row[0], count=row[1]) for row in cursor.fetchall()]
+        daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
        # Aggregate download counts by time bucket according to the timeframe.
        if timeframe == 'today':
-        # Group by hour (0-23)
+            # Group by hour using to_char
            cursor.execute('''
-            SELECT strftime('%H', timestamp) as bucket, COUNT(*) as count
+                SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
                FROM file_access_log
-            WHERE timestamp >= ?
+                WHERE timestamp >= %s
                GROUP BY bucket
                ORDER BY bucket
-        ''', (start.isoformat(),))
+            ''', (start,))
        elif timeframe in ('7days', '30days'):
-        # Group by day (YYYY-MM-DD)
+            # Group by day
            cursor.execute('''
-            SELECT date(timestamp) as bucket, COUNT(*) as count
+                SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
                FROM file_access_log
-            WHERE timestamp >= ?
+                WHERE timestamp >= %s
                GROUP BY bucket
                ORDER BY bucket
-        ''', (start.isoformat(),))
+            ''', (start,))
        elif timeframe == '365days':
-        # Group by month (YYYY-MM)
+            # Group by month using to_char
            cursor.execute('''
-            SELECT strftime('%Y-%m', timestamp) as bucket, COUNT(*) as count
+                SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
                FROM file_access_log
-            WHERE timestamp >= ?
+                WHERE timestamp >= %s
                GROUP BY bucket
                ORDER BY bucket
-        ''', (start.isoformat(),))
+            ''', (start,))
        else:
            # Fallback: group by day
            cursor.execute('''
-            SELECT date(timestamp) as bucket, COUNT(*) as count
+                SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
                FROM file_access_log
-            WHERE timestamp >= ?
+                WHERE timestamp >= %s
                GROUP BY bucket
                ORDER BY bucket
-        ''', (start.isoformat(),))
+            ''', (start,))
        timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
        # User agent distribution (aggregate by device type)
        cursor.execute('''
            SELECT user_agent, COUNT(*) as count
            FROM file_access_log
-        WHERE timestamp >= ?
+            WHERE timestamp >= %s
            GROUP BY user_agent
            ORDER BY count DESC
-    ''', (start.isoformat(),))
+        ''', (start,))
        raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
        device_counts = {}
        for entry in raw_user_agents:
            device = get_device_type(entry['user_agent'])
            device_counts[device] = device_counts.get(device, 0) + entry['count']
    # Rename to user_agent_data for compatibility with the frontend
        user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
        # Parent folder distribution
        cursor.execute('''
            SELECT rel_path, COUNT(*) as count
            FROM file_access_log
-        WHERE timestamp >= ?
+            WHERE timestamp >= %s
            GROUP BY rel_path
            ORDER BY count DESC
-    ''', (start.isoformat(),))
+        ''', (start,))
        folder_data = {}
        for row in cursor.fetchall():
            rel_path = row[0]
            parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root"
            folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1]
-    
+        folder_data = [dict(folder=folder, count=count) for folder, count in folder_data.items()]
    # Convert the dictionary to a list of dictionaries
    folder_data = [
        dict(folder=folder, count=count) 
        for folder, count in folder_data.items()
    ]
    # Sort by count in descending order and take the top 10
        folder_data.sort(key=lambda x: x['count'], reverse=True)
        folder_data = folder_data[:10]
@ -206,48 +220,35 @@ def dashboard():
        cursor.execute('''
            SELECT ip_address, COUNT(*) as count
            FROM file_access_log
-        WHERE timestamp >= ?
+            WHERE timestamp >= %s
            GROUP BY ip_address
            ORDER BY count DESC
-    ''', (start.isoformat(),))
+        ''', (start,))
        ip_rows = cursor.fetchall()
-    # Initialize GeoIP2 reader once for efficiency
+        # Summary stats using separate SQL queries
        cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,))
        total_accesses = cursor.fetchone()[0]
        cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,))
        unique_files = cursor.fetchone()[0]
        cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,))
        unique_user = cursor.fetchone()[0]
    # Process location data with GeoIP2.
    reader = geoip2.database.Reader('GeoLite2-City.mmdb')
    location_data = {}
    for ip, count in ip_rows:
        country, city = lookup_location(ip, reader)
        key = (country, city)
-        if key in location_data:
+        location_data[key] = location_data.get(key, 0) + count
            location_data[key] += count
        else:
            location_data[key] = count
    reader.close()
-    # Convert the dictionary to a list of dictionaries
+    location_data = [dict(country=key[0], city=key[1], count=value) for key, value in location_data.items()]
    location_data = [
        dict(country=key[0], city=key[1], count=value) 
        for key, value in location_data.items()
    ]
    # Sort by count in descending order and take the top 20
    location_data.sort(key=lambda x: x['count'], reverse=True)
    location_data = location_data[:20]
    # Summary stats using separate SQL queries
    cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
    total_accesses = cursor.fetchone()[0]
    # Use a separate query to count unique files (distinct rel_path values)
    cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
    unique_files = cursor.fetchone()[0]
    # Use a separate query to count unique IP addresses
    cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= ?', (start.isoformat(),))
    unique_user = cursor.fetchone()[0]
    conn.close()
    return render_template("dashboard.html",
                           timeframe=timeframe,
                           rows=rows,
@ -259,3 +260,6 @@ def dashboard():
                           unique_files=unique_files,
                           unique_user=unique_user,
                           timeframe_data=timeframe_data)
 if __name__ == '__main__':
    init_log_db()
--- a/app.py
+++ b/app.py
@ -197,21 +197,9 @@ def serve_file(subpath):
    mime, _ = mimetypes.guess_type(full_path)
    mime = mime or 'application/octet-stream'
    # logging only for mp3
    if mime and mime.startswith('audio/mpeg'):
        # HEAD request are coming in to initiate server caching.
        # only log initial hits and not the reload of further file parts
    range_header = request.headers.get('Range')
        # only request with starting from the beginning of the file will be tracked
        #                              no range -> full file                                                   not just the first byte
        if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
    ip_address = request.remote_addr
    user_agent = request.headers.get('User-Agent')
            threading.Thread(
                target=a.log_file_access,
                args=(subpath, ip_address, user_agent, session['device_id'])
            ).start()
    # Check cache first (using diskcache)
    response = None
@ -231,6 +219,7 @@ def serve_file(subpath):
    if cached:
        cached_file_bytes, mime = cached
        cached_file = io.BytesIO(cached_file_bytes)
        filesize = len(cached_file.getbuffer())
        response = send_file(cached_file, mimetype=mime)
    else:
        if mime and mime.startswith('image/'):
@ -245,6 +234,7 @@ def serve_file(subpath):
                    save_kwargs = {'quality': 85}
                    img_bytes_io = io.BytesIO()
                    filesize = len(img_bytes_io.getbuffer())
                    img.save(img_bytes_io, format=output_format, **save_kwargs)
                    thumb_bytes = img_bytes_io.getvalue()
                    cache.set(subpath, (thumb_bytes, output_mime))
@ -258,13 +248,32 @@ def serve_file(subpath):
                with open(full_path, 'rb') as f:
                    file_bytes = f.read()
                cache.set(subpath, (file_bytes, mime))
-                response = send_file(io.BytesIO(file_bytes), mimetype=mime, conditional=True)
+                file_bytes_io = io.BytesIO(file_bytes)
                filesize = len(file_bytes_io.getbuffer())
                response = send_file(file_bytes_io, mimetype=mime, conditional=True)
            except Exception as e:
                app.logger.error(f"Failed to read file {subpath}: {e}")
                abort(500)
    # Set Cache-Control header (browser caching for 1 day)
    response.headers['Cache-Control'] = 'public, max-age=86400'
    if mime and mime.startswith('audio/mpeg'): # special rules for mp3 files
        # HEAD request are coming in to initiate server caching. Ignore HEAD Request. Only log GET request.
        # log access if there is no range header. # log access if range request starts from 0 but is larger then only from 0 to 1 (bytes=0-1)
        if request.method == 'GET' and (not range_header or (range_header.startswith("bytes=0-") and range_header != "bytes=0-1")):
            logging = True
        else:
            logging = False
    else:
        logging = True
    if logging:
        threading.Thread(
            target=a.log_file_access,
            args=(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached), )
        ).start()
    return response
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,7 +1,7 @@
 services:
  flask-app:
    image: python:3.11-slim
-    container_name: "${CONTAINER_NAME}"
+    container_name: "${CONTAINER_NAME}.web"
    restart: always
    working_dir: /app
    volumes:
@ -19,8 +19,16 @@ services:
      - FLASK_ENV=production
      - TITLE_SHORT=${TITLE_SHORT}
      - TITLE_LONG=${TITLE_LONG}
      - DB_HOST=postgres
      - DB_PORT=5432
      - DB_USER=${POSTGRES_USER}
      - DB_PASSWORD=${POSTGRES_PASSWORD}
      - DB_NAME=${POSTGRES_DB}
    depends_on:
      - postgres
    networks:
      - traefik
      - internal
    labels:
      - "traefik.enable=true"
@ -44,6 +52,21 @@ services:
      sh -c "pip install -r requirements.txt &&
            gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app"
  postgres:
    image: postgres:15
    container_name: "${CONTAINER_NAME}.postgres"
    restart: always
    environment:
      POSTGRES_USER: ${POSTGRES_USER}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
      POSTGRES_DB: ${POSTGRES_DB}
    volumes:
      - ./postgres_data:/var/lib/postgresql/data
    networks:
      - internal
 networks:
  traefik:
    external: true
  internal:
    internal: true
--- a/requirements.txt
+++ b/requirements.txt
@ -6,3 +6,4 @@ diskcache
 geoip2
 gunicorn
 eventlet
 psycopg2-binary