filetype filtering in db

This commit is contained in:
lelo 2025-03-31 20:12:11 +00:00
parent 928fdb8901
commit 80220e1250
2 changed files with 90 additions and 54 deletions

View File

@ -1,10 +1,8 @@
from flask import render_template, request, session
from flask import render_template, request
from datetime import datetime, timedelta
import geoip2.database
from urllib.parse import urlparse, unquote
from auth import require_secret
import os
import threading
import psycopg2
file_access_temp = []
@ -26,12 +24,13 @@ class Database(metaclass=SingletonMeta):
self.password = os.environ.get('DB_PASSWORD')
self.host = os.environ.get('DB_HOST')
self.port = int(os.environ.get('DB_PORT', 5432))
self.connection = psycopg2.connect(dbname=self.dbname,
user=self.user,
password=self.password,
host=self.host,
port=self.port)
# Enable autocommit so we don't have to call commit() after every transaction.
# Enable autocommit
self.connection.autocommit = True
self.init_log_db()
@ -53,11 +52,8 @@ class Database(metaclass=SingletonMeta):
)
''')
try:
# Create a global database instance.
log_db = Database()
except:
print("No access to database. No logs available!!!")
def lookup_location(ip, reader):
try:
@ -111,9 +107,28 @@ def connections():
@require_secret
def dashboard():
filetype_arg = request.args.get('filetype', 'audio')
timeframe = request.args.get('timeframe', 'today')
now = datetime.now()
# Determine which file type we're filtering by.
filetype = 'other'
allowed_list = ['mp3', 'wav', 'audio']
if filetype_arg.lower() in allowed_list:
filetype = 'audio/'
allowed_list = ['jpg', 'jpeg', 'image', 'photo']
if filetype_arg.lower() in allowed_list:
filetype = 'image/'
allowed_list = ['mp4', 'mov', 'wmv', 'avi']
if filetype_arg.lower() in allowed_list:
filetype = 'video/'
# Determine the start time based on timeframe.
if timeframe == 'today':
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
elif timeframe == '7days':
@ -125,75 +140,88 @@ def dashboard():
else:
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
# Build the SQL filter for mime
if filetype == 'other':
# Exclude audio, image, and video mimes
filetype_filter_sql = "AND mime NOT LIKE 'audio/%' AND mime NOT LIKE 'image/%' AND mime NOT LIKE 'video/%'"
params = (start,)
else:
# Filter for mimes that start with the given type.
filetype_filter_sql = "AND mime LIKE %s"
params = (start, filetype + '%')
with log_db.connection.cursor() as cursor:
# Raw file access counts for the table (top files)
cursor.execute('''
# Raw file access counts (top files)
query = f'''
SELECT rel_path, COUNT(*) as access_count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY rel_path
ORDER BY access_count DESC
LIMIT 20
''', (start,))
'''
cursor.execute(query, params)
rows = cursor.fetchall()
# Daily access trend for a line chart
cursor.execute('''
query = f'''
SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY CAST(timestamp AS DATE)
ORDER BY date
''', (start,))
'''
cursor.execute(query, params)
daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
# Aggregate download counts by time bucket according to the timeframe.
if timeframe == 'today':
# Group by hour using to_char
cursor.execute('''
query = f'''
SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
''', (start,))
'''
cursor.execute(query, params)
elif timeframe in ('7days', '30days'):
# Group by day
cursor.execute('''
query = f'''
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
''', (start,))
'''
cursor.execute(query, params)
elif timeframe == '365days':
# Group by month using to_char
cursor.execute('''
query = f'''
SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
''', (start,))
'''
cursor.execute(query, params)
else:
# Fallback: group by day
cursor.execute('''
query = f'''
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket
ORDER BY bucket
''', (start,))
'''
cursor.execute(query, params)
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
# User agent distribution (aggregate by device type)
cursor.execute('''
query = f'''
SELECT user_agent, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY user_agent
ORDER BY count DESC
''', (start,))
'''
cursor.execute(query, params)
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
device_counts = {}
for entry in raw_user_agents:
@ -202,13 +230,14 @@ def dashboard():
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
# Parent folder distribution
cursor.execute('''
query = f'''
SELECT rel_path, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY rel_path
ORDER BY count DESC
''', (start,))
'''
cursor.execute(query, params)
folder_data = {}
for row in cursor.fetchall():
rel_path = row[0]
@ -219,23 +248,27 @@ def dashboard():
folder_data = folder_data[:10]
# Aggregate IP addresses with counts
cursor.execute('''
query = f'''
SELECT ip_address, COUNT(*) as count
FROM file_access_log
WHERE timestamp >= %s
WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY ip_address
ORDER BY count DESC
''', (start,))
'''
cursor.execute(query, params)
ip_rows = cursor.fetchall()
# Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,))
query = f'SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
cursor.execute(query, params)
total_accesses = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,))
query = f'SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
cursor.execute(query, params)
unique_files = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,))
query = f'SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
cursor.execute(query, params)
unique_user = cursor.fetchone()[0]
# Process location data with GeoIP2.
@ -263,3 +296,4 @@ def dashboard():
unique_user=unique_user,
timeframe_data=timeframe_data)

View File

@ -19,13 +19,13 @@ services:
- FLASK_ENV=production
- TITLE_SHORT=${TITLE_SHORT}
- TITLE_LONG=${TITLE_LONG}
- DB_HOST=${CONTAINER_NAME}.sql
- DB_HOST=postgres-db
- DB_PORT=5432
- DB_USER=${POSTGRES_USER}
- DB_PASSWORD=${POSTGRES_PASSWORD}
- DB_NAME=${POSTGRES_DB}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_NAME=${DB_NAME}
depends_on:
- postgres
- "postgres"
networks:
- traefik
- internal
@ -54,16 +54,18 @@ services:
postgres:
image: postgres:15
container_name: "${CONTAINER_NAME}.sql"
container_name: "${CONTAINER_NAME}-db"
restart: always
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_USER: ${DB_USER:?}
POSTGRES_PASSWORD: ${DB_PASSWORD:?}
POSTGRES_DB: ${DB_NAME:?}
volumes:
- ./postgres_data:/var/lib/postgresql/data
networks:
- internal
internal:
aliases:
- postgres-db
networks:
traefik: