filetype filtering in db

This commit is contained in:
lelo 2025-03-31 20:12:11 +00:00
parent 928fdb8901
commit 80220e1250
2 changed files with 90 additions and 54 deletions

View File

@ -1,10 +1,8 @@
from flask import render_template, request, session from flask import render_template, request
from datetime import datetime, timedelta from datetime import datetime, timedelta
import geoip2.database import geoip2.database
from urllib.parse import urlparse, unquote
from auth import require_secret from auth import require_secret
import os import os
import threading
import psycopg2 import psycopg2
file_access_temp = [] file_access_temp = []
@ -26,12 +24,13 @@ class Database(metaclass=SingletonMeta):
self.password = os.environ.get('DB_PASSWORD') self.password = os.environ.get('DB_PASSWORD')
self.host = os.environ.get('DB_HOST') self.host = os.environ.get('DB_HOST')
self.port = int(os.environ.get('DB_PORT', 5432)) self.port = int(os.environ.get('DB_PORT', 5432))
self.connection = psycopg2.connect(dbname=self.dbname, self.connection = psycopg2.connect(dbname=self.dbname,
user=self.user, user=self.user,
password=self.password, password=self.password,
host=self.host, host=self.host,
port=self.port) port=self.port)
# Enable autocommit so we don't have to call commit() after every transaction. # Enable autocommit
self.connection.autocommit = True self.connection.autocommit = True
self.init_log_db() self.init_log_db()
@ -53,11 +52,8 @@ class Database(metaclass=SingletonMeta):
) )
''') ''')
try:
# Create a global database instance.
log_db = Database() log_db = Database()
except:
print("No access to database. No logs available!!!")
def lookup_location(ip, reader): def lookup_location(ip, reader):
try: try:
@ -111,9 +107,28 @@ def connections():
@require_secret @require_secret
def dashboard(): def dashboard():
filetype_arg = request.args.get('filetype', 'audio')
timeframe = request.args.get('timeframe', 'today') timeframe = request.args.get('timeframe', 'today')
now = datetime.now() now = datetime.now()
# Determine which file type we're filtering by.
filetype = 'other'
allowed_list = ['mp3', 'wav', 'audio']
if filetype_arg.lower() in allowed_list:
filetype = 'audio/'
allowed_list = ['jpg', 'jpeg', 'image', 'photo']
if filetype_arg.lower() in allowed_list:
filetype = 'image/'
allowed_list = ['mp4', 'mov', 'wmv', 'avi']
if filetype_arg.lower() in allowed_list:
filetype = 'video/'
# Determine the start time based on timeframe.
if timeframe == 'today': if timeframe == 'today':
start = now.replace(hour=0, minute=0, second=0, microsecond=0) start = now.replace(hour=0, minute=0, second=0, microsecond=0)
elif timeframe == '7days': elif timeframe == '7days':
@ -125,75 +140,88 @@ def dashboard():
else: else:
start = now.replace(hour=0, minute=0, second=0, microsecond=0) start = now.replace(hour=0, minute=0, second=0, microsecond=0)
# Build the SQL filter for mime
if filetype == 'other':
# Exclude audio, image, and video mimes
filetype_filter_sql = "AND mime NOT LIKE 'audio/%' AND mime NOT LIKE 'image/%' AND mime NOT LIKE 'video/%'"
params = (start,)
else:
# Filter for mimes that start with the given type.
filetype_filter_sql = "AND mime LIKE %s"
params = (start, filetype + '%')
with log_db.connection.cursor() as cursor: with log_db.connection.cursor() as cursor:
# Raw file access counts for the table (top files) # Raw file access counts (top files)
cursor.execute(''' query = f'''
SELECT rel_path, COUNT(*) as access_count SELECT rel_path, COUNT(*) as access_count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY rel_path GROUP BY rel_path
ORDER BY access_count DESC ORDER BY access_count DESC
LIMIT 20 LIMIT 20
''', (start,)) '''
cursor.execute(query, params)
rows = cursor.fetchall() rows = cursor.fetchall()
# Daily access trend for a line chart # Daily access trend for a line chart
cursor.execute(''' query = f'''
SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY CAST(timestamp AS DATE) GROUP BY CAST(timestamp AS DATE)
ORDER BY date ORDER BY date
''', (start,)) '''
cursor.execute(query, params)
daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()] daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
# Aggregate download counts by time bucket according to the timeframe. # Aggregate download counts by time bucket according to the timeframe.
if timeframe == 'today': if timeframe == 'today':
# Group by hour using to_char query = f'''
cursor.execute('''
SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start,)) '''
cursor.execute(query, params)
elif timeframe in ('7days', '30days'): elif timeframe in ('7days', '30days'):
# Group by day query = f'''
cursor.execute('''
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start,)) '''
cursor.execute(query, params)
elif timeframe == '365days': elif timeframe == '365days':
# Group by month using to_char query = f'''
cursor.execute('''
SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start,)) '''
cursor.execute(query, params)
else: else:
# Fallback: group by day query = f'''
cursor.execute('''
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY bucket GROUP BY bucket
ORDER BY bucket ORDER BY bucket
''', (start,)) '''
cursor.execute(query, params)
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()] timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
# User agent distribution (aggregate by device type) # User agent distribution (aggregate by device type)
cursor.execute(''' query = f'''
SELECT user_agent, COUNT(*) as count SELECT user_agent, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY user_agent GROUP BY user_agent
ORDER BY count DESC ORDER BY count DESC
''', (start,)) '''
cursor.execute(query, params)
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()] raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
device_counts = {} device_counts = {}
for entry in raw_user_agents: for entry in raw_user_agents:
@ -202,13 +230,14 @@ def dashboard():
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()] user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
# Parent folder distribution # Parent folder distribution
cursor.execute(''' query = f'''
SELECT rel_path, COUNT(*) as count SELECT rel_path, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY rel_path GROUP BY rel_path
ORDER BY count DESC ORDER BY count DESC
''', (start,)) '''
cursor.execute(query, params)
folder_data = {} folder_data = {}
for row in cursor.fetchall(): for row in cursor.fetchall():
rel_path = row[0] rel_path = row[0]
@ -219,23 +248,27 @@ def dashboard():
folder_data = folder_data[:10] folder_data = folder_data[:10]
# Aggregate IP addresses with counts # Aggregate IP addresses with counts
cursor.execute(''' query = f'''
SELECT ip_address, COUNT(*) as count SELECT ip_address, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= %s WHERE timestamp >= %s {filetype_filter_sql}
GROUP BY ip_address GROUP BY ip_address
ORDER BY count DESC ORDER BY count DESC
''', (start,)) '''
cursor.execute(query, params)
ip_rows = cursor.fetchall() ip_rows = cursor.fetchall()
# Summary stats using separate SQL queries # Summary stats using separate SQL queries
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,)) query = f'SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
cursor.execute(query, params)
total_accesses = cursor.fetchone()[0] total_accesses = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,)) query = f'SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
cursor.execute(query, params)
unique_files = cursor.fetchone()[0] unique_files = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,)) query = f'SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
cursor.execute(query, params)
unique_user = cursor.fetchone()[0] unique_user = cursor.fetchone()[0]
# Process location data with GeoIP2. # Process location data with GeoIP2.
@ -263,3 +296,4 @@ def dashboard():
unique_user=unique_user, unique_user=unique_user,
timeframe_data=timeframe_data) timeframe_data=timeframe_data)

View File

@ -19,13 +19,13 @@ services:
- FLASK_ENV=production - FLASK_ENV=production
- TITLE_SHORT=${TITLE_SHORT} - TITLE_SHORT=${TITLE_SHORT}
- TITLE_LONG=${TITLE_LONG} - TITLE_LONG=${TITLE_LONG}
- DB_HOST=${CONTAINER_NAME}.sql - DB_HOST=postgres-db
- DB_PORT=5432 - DB_PORT=5432
- DB_USER=${POSTGRES_USER} - DB_USER=${DB_USER}
- DB_PASSWORD=${POSTGRES_PASSWORD} - DB_PASSWORD=${DB_PASSWORD}
- DB_NAME=${POSTGRES_DB} - DB_NAME=${DB_NAME}
depends_on: depends_on:
- postgres - "postgres"
networks: networks:
- traefik - traefik
- internal - internal
@ -54,16 +54,18 @@ services:
postgres: postgres:
image: postgres:15 image: postgres:15
container_name: "${CONTAINER_NAME}.sql" container_name: "${CONTAINER_NAME}-db"
restart: always restart: always
environment: environment:
POSTGRES_USER: ${POSTGRES_USER} POSTGRES_USER: ${DB_USER:?}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_PASSWORD: ${DB_PASSWORD:?}
POSTGRES_DB: ${POSTGRES_DB} POSTGRES_DB: ${DB_NAME:?}
volumes: volumes:
- ./postgres_data:/var/lib/postgresql/data - ./postgres_data:/var/lib/postgresql/data
networks: networks:
- internal internal:
aliases:
- postgres-db
networks: networks:
traefik: traefik: