back to sqlite

2025-03-31 22:06:32 +00:00 · 2025-03-31 22:06:32 +00:00 · 76eca80a4a
commit 76eca80a4a
parent 4db37c49ff
3 changed files with 220 additions and 199 deletions
--- a/analytics.py
+++ b/analytics.py
@ -1,37 +1,27 @@
+import sqlite3
 from flask import render_template, request
 from datetime import datetime, timedelta
 import geoip2.database
 from auth import require_secret
 import os
-import psycopg2

 file_access_temp = []

-dbname     = os.environ.get('DB_NAME')
-user       = os.environ.get('DB_USER')
-password   = os.environ.get('DB_PASSWORD')
-host       = os.environ.get('DB_HOST')
-port       = int(os.environ.get('DB_PORT', 5432))
+# Example database name; you can change to whatever you want:
+DB_NAME = 'access_log.db'

-connection = psycopg2.connect(dbname=dbname,
-                                user=user,
-                                password=password,
-                                host=host,
-                                port=port
-                                )
-# Enable autocommit
-connection.autocommit = True
-log_db = connection
+# Create a single global connection to SQLite
+log_db = sqlite3.connect(DB_NAME, check_same_thread=False)

-# Function to initialize the database.
 def init_log_db():
-    with log_db.cursor() as cursor:
-        cursor.execute('''
+    """Create the file_access_log table if it doesn't already exist."""
+    with log_db:
+        log_db.execute('''
            CREATE TABLE IF NOT EXISTS file_access_log (
-                id SERIAL PRIMARY KEY,
-                timestamp TIMESTAMP,
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp TEXT,
                rel_path TEXT,
-                filesize BIGINT,
+                filesize INTEGER,
                mime TEXT,
                ip_address TEXT,
                user_agent TEXT,
@ -42,8 +32,6 @@ def init_log_db():

 init_log_db()

-
-
 def lookup_location(ip, reader):
    try:
        response = reader.city(ip)
@ -54,7 +42,7 @@ def lookup_location(ip, reader):
        return "Unknown", "Unknown"

 def get_device_type(user_agent):
-    "Classify device type based on user agent string"
+    """Classify device type based on user agent string."""
    if 'Android' in user_agent:
        return 'Android'
    elif 'iPhone' in user_agent or 'iPad' in user_agent:
@ -68,22 +56,30 @@ def get_device_type(user_agent):
    else:
        return 'Other'

-# Logging function that uses the singleton connection.
 def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
+    """Insert a file access record into the database."""
    global file_access_temp
-    timestamp = datetime.now()  # Use datetime object directly
-    with log_db.connection.cursor() as cursor:
-        cursor.execute('''
-            INSERT INTO file_access_log (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
-            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
-        ''', (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
-    file_access_temp.insert(0, [timestamp.isoformat(), rel_path, filesize, mime, ip_address, user_agent, device_id, cached])
-    return timestamp.isoformat()
+    timestamp = datetime.now()  # a datetime object
+
+    # Store the ISO timestamp in the database for easy lexical comparison
+    iso_ts = timestamp.isoformat()
+
+    with log_db:
+        log_db.execute('''
+            INSERT INTO file_access_log 
+                (timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        ''', (iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached))
+    file_access_temp.insert(0, [iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached])
+
+    return iso_ts

 def return_file_access():
+    """Return recent file access logs from memory (the last 10 minutes)."""
    global file_access_temp
    if file_access_temp:
        cutoff_time = datetime.now() - timedelta(minutes=10)
+        # Convert each stored timestamp (ISO string) back to datetime
        file_access_temp[:] = [
            entry for entry in file_access_temp
            if datetime.fromisoformat(entry[0]) >= cutoff_time
@ -105,186 +101,234 @@ def dashboard():
    # Determine which file type we're filtering by.
    filetype = 'other'

-    allowed_list = ['mp3', 'wav', 'audio']
-    if filetype_arg.lower() in allowed_list:
+    # Some simplistic sets to decide how we match the MIME type
+    audio_list = ['mp3', 'wav', 'audio']
+    image_list = ['jpg', 'jpeg', 'image', 'photo']
+    video_list = ['mp4', 'mov', 'wmv', 'avi']
+
+    if filetype_arg.lower() in audio_list:
        filetype = 'audio/'
-        
-    allowed_list = ['jpg', 'jpeg', 'image', 'photo']
-    if filetype_arg.lower() in allowed_list:
+    elif filetype_arg.lower() in image_list:
        filetype = 'image/'
-    
-    allowed_list = ['mp4', 'mov', 'wmv', 'avi']
-    if filetype_arg.lower() in allowed_list:
+    elif filetype_arg.lower() in video_list:
        filetype = 'video/'

-        
-
-    # Determine the start time based on timeframe.
+    # Determine start time based on timeframe
    if timeframe == 'today':
-        start = now.replace(hour=0, minute=0, second=0, microsecond=0)
+        start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0)
    elif timeframe == '7days':
-        start = now - timedelta(days=7)
+        start_dt = now - timedelta(days=7)
    elif timeframe == '30days':
-        start = now - timedelta(days=30)
+        start_dt = now - timedelta(days=30)
    elif timeframe == '365days':
-        start = now - timedelta(days=365)
+        start_dt = now - timedelta(days=365)
    else:
-        start = now.replace(hour=0, minute=0, second=0, microsecond=0)
+        start_dt = now.replace(hour=0, minute=0, second=0, microsecond=0)

-    # Build the SQL filter for mime
+    # We'll compare the textual timestamp (ISO 8601).
+    start_str = start_dt.isoformat()
+
+    # Build the SQL filter
    if filetype == 'other':
-        # Exclude audio, image, and video mimes
-        filetype_filter_sql = "AND mime NOT LIKE 'audio/%' AND mime NOT LIKE 'image/%' AND mime NOT LIKE 'video/%'"
-        params = (start,)
+        # Exclude audio, image, video
+        filetype_filter_sql = (
+            "AND mime NOT LIKE 'audio/%' "
+            "AND mime NOT LIKE 'image/%' "
+            "AND mime NOT LIKE 'video/%' "
+        )
+        params_for_filter = (start_str,)
    else:
-        # Filter for mimes that start with the given type.
-        filetype_filter_sql = "AND mime LIKE %s"
-        params = (start, filetype + '%')
+        # Filter for mimes that start with the given type
+        filetype_filter_sql = "AND mime LIKE ?"
+        params_for_filter = (start_str, filetype + '%')

-    with log_db.connection.cursor() as cursor:
-        # Raw file access counts (top files)
-        query = f'''
-            SELECT rel_path, COUNT(*) as access_count
-            FROM file_access_log
-            WHERE timestamp >= %s {filetype_filter_sql}
-            GROUP BY rel_path
-            ORDER BY access_count DESC
-            LIMIT 20
-        '''
-        cursor.execute(query, params)
+    # 1. Top files by access count
+    query = f'''
+        SELECT rel_path, COUNT(*) as access_count
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+        GROUP BY rel_path
+        ORDER BY access_count DESC
+        LIMIT 20
+    '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
        rows = cursor.fetchall()

-        # Daily access trend for a line chart
-        query = f'''
-            SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
-            FROM file_access_log
-            WHERE timestamp >= %s {filetype_filter_sql}
-            GROUP BY CAST(timestamp AS DATE)
-            ORDER BY date
-        '''
-        cursor.execute(query, params)
-        daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
+    # 2. Daily access trend (line chart)
+    #    We'll group by day using substr(timestamp, 1, 10) -> YYYY-MM-DD
+    query = f'''
+        SELECT substr(timestamp, 1, 10) AS date, COUNT(*) AS count
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+        GROUP BY date
+        ORDER BY date
+    '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
+        daily_rows = cursor.fetchall()
+    daily_access_data = [
+        dict(date=r[0], count=r[1]) for r in daily_rows
+    ]

-        # Aggregate download counts by time bucket according to the timeframe.
-        if timeframe == 'today':
-            query = f'''
-                SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
-                FROM file_access_log
-                WHERE timestamp >= %s {filetype_filter_sql}
-                GROUP BY bucket
-                ORDER BY bucket
-            '''
-            cursor.execute(query, params)
-        elif timeframe in ('7days', '30days'):
-            query = f'''
-                SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
-                FROM file_access_log
-                WHERE timestamp >= %s {filetype_filter_sql}
-                GROUP BY bucket
-                ORDER BY bucket
-            '''
-            cursor.execute(query, params)
-        elif timeframe == '365days':
-            query = f'''
-                SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
-                FROM file_access_log
-                WHERE timestamp >= %s {filetype_filter_sql}
-                GROUP BY bucket
-                ORDER BY bucket
-            '''
-            cursor.execute(query, params)
-        else:
-            query = f'''
-                SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
-                FROM file_access_log
-                WHERE timestamp >= %s {filetype_filter_sql}
-                GROUP BY bucket
-                ORDER BY bucket
-            '''
-            cursor.execute(query, params)
-        timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
-
-        # User agent distribution (aggregate by device type)
+    # 3. Timeframe-based aggregation
+    #    We'll group by hour if "today", by day if "7days"/"30days", by month if "365days".
+    if timeframe == 'today':
+        # Hour: substr(timestamp, 12, 2) -> HH
        query = f'''
-            SELECT user_agent, COUNT(*) as count
+            SELECT substr(timestamp, 12, 2) AS bucket, COUNT(*) AS count
            FROM file_access_log
-            WHERE timestamp >= %s {filetype_filter_sql}
-            GROUP BY user_agent
-            ORDER BY count DESC
+            WHERE timestamp >= ? {filetype_filter_sql}
+            GROUP BY bucket
+            ORDER BY bucket
        '''
-        cursor.execute(query, params)
-        raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
-        device_counts = {}
-        for entry in raw_user_agents:
-            device = get_device_type(entry['user_agent'])
-            device_counts[device] = device_counts.get(device, 0) + entry['count']
-        user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
-
-        # Parent folder distribution
+    elif timeframe in ('7days', '30days'):
+        # Day: substr(timestamp, 1, 10) -> YYYY-MM-DD
        query = f'''
-            SELECT rel_path, COUNT(*) as count
+            SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
            FROM file_access_log
-            WHERE timestamp >= %s {filetype_filter_sql}
-            GROUP BY rel_path
-            ORDER BY count DESC
+            WHERE timestamp >= ? {filetype_filter_sql}
+            GROUP BY bucket
+            ORDER BY bucket
        '''
-        cursor.execute(query, params)
-        folder_data = {}
-        for row in cursor.fetchall():
-            rel_path = row[0]
-            parent_folder = rel_path.rsplit('/', 1)[0] if '/' in rel_path else "Root"
-            folder_data[parent_folder] = folder_data.get(parent_folder, 0) + row[1]
-        folder_data = [dict(folder=folder, count=count) for folder, count in folder_data.items()]
-        folder_data.sort(key=lambda x: x['count'], reverse=True)
-        folder_data = folder_data[:10]
-
-        # Aggregate IP addresses with counts
+    elif timeframe == '365days':
+        # Month: substr(timestamp, 1, 7) -> YYYY-MM
        query = f'''
-            SELECT ip_address, COUNT(*) as count
+            SELECT substr(timestamp, 1, 7) AS bucket, COUNT(*) AS count
            FROM file_access_log
-            WHERE timestamp >= %s {filetype_filter_sql}
-            GROUP BY ip_address
-            ORDER BY count DESC
+            WHERE timestamp >= ? {filetype_filter_sql}
+            GROUP BY bucket
+            ORDER BY bucket
        '''
-        cursor.execute(query, params)
+    else:
+        # Default: group by day
+        query = f'''
+            SELECT substr(timestamp, 1, 10) AS bucket, COUNT(*) AS count
+            FROM file_access_log
+            WHERE timestamp >= ? {filetype_filter_sql}
+            GROUP BY bucket
+            ORDER BY bucket
+        '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
+        timeframe_data_rows = cursor.fetchall()
+    timeframe_data = [
+        dict(bucket=r[0], count=r[1]) for r in timeframe_data_rows
+    ]
+
+    # 4. User agent distribution
+    query = f'''
+        SELECT user_agent, COUNT(*) AS count
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+        GROUP BY user_agent
+        ORDER BY count DESC
+    '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
+        raw_user_agents = cursor.fetchall()
+    device_counts = {}
+    for (ua, cnt) in raw_user_agents:
+        device = get_device_type(ua)
+        device_counts[device] = device_counts.get(device, 0) + cnt
+    user_agent_data = [
+        dict(device=d, count=c) for d, c in device_counts.items()
+    ]
+
+    # 5. Parent folder distribution
+    query = f'''
+        SELECT rel_path, COUNT(*) AS count
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+        GROUP BY rel_path
+        ORDER BY count DESC
+    '''
+    folder_data_dict = {}
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
+        for (rp, c) in cursor.fetchall():
+            if '/' in rp:
+                parent_folder = rp.rsplit('/', 1)[0]
+            else:
+                parent_folder = "Root"
+            folder_data_dict[parent_folder] = folder_data_dict.get(parent_folder, 0) + c
+    folder_data = [dict(folder=f, count=cnt) for f, cnt in folder_data_dict.items()]
+    folder_data.sort(key=lambda x: x['count'], reverse=True)
+    folder_data = folder_data[:10]
+
+    # 6. Aggregate IP addresses with counts
+    query = f'''
+        SELECT ip_address, COUNT(*) as count
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+        GROUP BY ip_address
+        ORDER BY count DESC
+    '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
        ip_rows = cursor.fetchall()

-        # Summary stats using separate SQL queries
-        query = f'SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
-        cursor.execute(query, params)
+    # 7. Summary stats
+    # total_accesses
+    query = f'''
+        SELECT COUNT(*) 
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+    '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
        total_accesses = cursor.fetchone()[0]

-        query = f'SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
-        cursor.execute(query, params)
+    # unique_files
+    query = f'''
+        SELECT COUNT(DISTINCT rel_path)
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+    '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
        unique_files = cursor.fetchone()[0]

-        query = f'SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
-        cursor.execute(query, params)
+    # unique_user
+    query = f'''
+        SELECT COUNT(DISTINCT device_id)
+        FROM file_access_log
+        WHERE timestamp >= ? {filetype_filter_sql}
+    '''
+    with log_db:
+        cursor = log_db.execute(query, params_for_filter)
        unique_user = cursor.fetchone()[0]

-    # Process location data with GeoIP2.
+    # 8. Process location data with GeoIP2
    reader = geoip2.database.Reader('GeoLite2-City.mmdb')
-    location_data = {}
-    for ip, count in ip_rows:
-        country, city = lookup_location(ip, reader)
+    location_data_dict = {}
+    for (ip_addr, cnt) in ip_rows:
+        country, city = lookup_location(ip_addr, reader)
        key = (country, city)
-        location_data[key] = location_data.get(key, 0) + count
+        location_data_dict[key] = location_data_dict.get(key, 0) + cnt
    reader.close()

-    location_data = [dict(country=key[0], city=key[1], count=value) for key, value in location_data.items()]
+    location_data = [
+        dict(country=k[0], city=k[1], count=v)
+        for k, v in location_data_dict.items()
+    ]
    location_data.sort(key=lambda x: x['count'], reverse=True)
    location_data = location_data[:20]

-    return render_template("dashboard.html",
-                           timeframe=timeframe,
-                           rows=rows,
-                           daily_access_data=daily_access_data,
-                           user_agent_data=user_agent_data,
-                           folder_data=folder_data,
-                           location_data=location_data,
-                           total_accesses=total_accesses,
-                           unique_files=unique_files,
-                           unique_user=unique_user,
-                           timeframe_data=timeframe_data)
-
+    # Convert the top-files rows to a list of dictionaries
+    # (just for consistency in passing to template).
+    rows = [dict(rel_path=r[0], access_count=r[1]) for r in rows]

+    return render_template(
+        "dashboard.html",
+        timeframe=timeframe,
+        rows=rows,
+        daily_access_data=daily_access_data,
+        user_agent_data=user_agent_data,
+        folder_data=folder_data,
+        location_data=location_data,
+        total_accesses=total_accesses,
+        unique_files=unique_files,
+        unique_user=unique_user,
+        timeframe_data=timeframe_data
+    )
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -19,15 +19,8 @@ services:
      - FLASK_ENV=production
      - TITLE_SHORT=${TITLE_SHORT}
      - TITLE_LONG=${TITLE_LONG}
-      - DB_HOST=postgres-db
-      - DB_USER=${DB_USER}
-      - DB_PASSWORD=${DB_PASSWORD}
-      - DB_NAME=${DB_NAME}
-    depends_on:
-      - "postgres"
    networks:
      - traefik
-      - internal
    labels:
      - "traefik.enable=true"

@ -51,22 +44,7 @@ services:
      sh -c "pip install -r requirements.txt &&
            gunicorn --worker-class eventlet -w 1 -b 0.0.0.0:5000 app:app"

-  postgres:
-    image: postgres:17
-    restart: always
-    environment:
-      POSTGRES_USER: ${DB_USER:?}
-      POSTGRES_PASSWORD: ${DB_PASSWORD:?}
-      POSTGRES_DB: ${DB_NAME:?}
-    volumes:
-      - ./postgres_data:/var/lib/postgresql/data
-    networks:
-      internal:
-        aliases:
-          - postgres-db

 networks:
  traefik:
    external: true
-  internal:
-    internal: true
--- a/requirements.txt
+++ b/requirements.txt
@ -6,4 +6,3 @@ diskcache
 geoip2
 gunicorn
 eventlet
-psycopg2-binary