filetype filtering in db
This commit is contained in:
parent
928fdb8901
commit
80220e1250
122
analytics.py
122
analytics.py
@ -1,10 +1,8 @@
|
|||||||
from flask import render_template, request, session
|
from flask import render_template, request
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import geoip2.database
|
import geoip2.database
|
||||||
from urllib.parse import urlparse, unquote
|
|
||||||
from auth import require_secret
|
from auth import require_secret
|
||||||
import os
|
import os
|
||||||
import threading
|
|
||||||
import psycopg2
|
import psycopg2
|
||||||
|
|
||||||
file_access_temp = []
|
file_access_temp = []
|
||||||
@ -26,12 +24,13 @@ class Database(metaclass=SingletonMeta):
|
|||||||
self.password = os.environ.get('DB_PASSWORD')
|
self.password = os.environ.get('DB_PASSWORD')
|
||||||
self.host = os.environ.get('DB_HOST')
|
self.host = os.environ.get('DB_HOST')
|
||||||
self.port = int(os.environ.get('DB_PORT', 5432))
|
self.port = int(os.environ.get('DB_PORT', 5432))
|
||||||
|
|
||||||
self.connection = psycopg2.connect(dbname=self.dbname,
|
self.connection = psycopg2.connect(dbname=self.dbname,
|
||||||
user=self.user,
|
user=self.user,
|
||||||
password=self.password,
|
password=self.password,
|
||||||
host=self.host,
|
host=self.host,
|
||||||
port=self.port)
|
port=self.port)
|
||||||
# Enable autocommit so we don't have to call commit() after every transaction.
|
# Enable autocommit
|
||||||
self.connection.autocommit = True
|
self.connection.autocommit = True
|
||||||
|
|
||||||
self.init_log_db()
|
self.init_log_db()
|
||||||
@ -52,12 +51,9 @@ class Database(metaclass=SingletonMeta):
|
|||||||
cached BOOLEAN
|
cached BOOLEAN
|
||||||
)
|
)
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
log_db = Database()
|
||||||
|
|
||||||
try:
|
|
||||||
# Create a global database instance.
|
|
||||||
log_db = Database()
|
|
||||||
except:
|
|
||||||
print("No access to database. No logs available!!!")
|
|
||||||
|
|
||||||
def lookup_location(ip, reader):
|
def lookup_location(ip, reader):
|
||||||
try:
|
try:
|
||||||
@ -111,9 +107,28 @@ def connections():
|
|||||||
|
|
||||||
@require_secret
|
@require_secret
|
||||||
def dashboard():
|
def dashboard():
|
||||||
|
filetype_arg = request.args.get('filetype', 'audio')
|
||||||
timeframe = request.args.get('timeframe', 'today')
|
timeframe = request.args.get('timeframe', 'today')
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
|
|
||||||
|
# Determine which file type we're filtering by.
|
||||||
|
filetype = 'other'
|
||||||
|
|
||||||
|
allowed_list = ['mp3', 'wav', 'audio']
|
||||||
|
if filetype_arg.lower() in allowed_list:
|
||||||
|
filetype = 'audio/'
|
||||||
|
|
||||||
|
allowed_list = ['jpg', 'jpeg', 'image', 'photo']
|
||||||
|
if filetype_arg.lower() in allowed_list:
|
||||||
|
filetype = 'image/'
|
||||||
|
|
||||||
|
allowed_list = ['mp4', 'mov', 'wmv', 'avi']
|
||||||
|
if filetype_arg.lower() in allowed_list:
|
||||||
|
filetype = 'video/'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Determine the start time based on timeframe.
|
||||||
if timeframe == 'today':
|
if timeframe == 'today':
|
||||||
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
elif timeframe == '7days':
|
elif timeframe == '7days':
|
||||||
@ -125,75 +140,88 @@ def dashboard():
|
|||||||
else:
|
else:
|
||||||
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
|
# Build the SQL filter for mime
|
||||||
|
if filetype == 'other':
|
||||||
|
# Exclude audio, image, and video mimes
|
||||||
|
filetype_filter_sql = "AND mime NOT LIKE 'audio/%' AND mime NOT LIKE 'image/%' AND mime NOT LIKE 'video/%'"
|
||||||
|
params = (start,)
|
||||||
|
else:
|
||||||
|
# Filter for mimes that start with the given type.
|
||||||
|
filetype_filter_sql = "AND mime LIKE %s"
|
||||||
|
params = (start, filetype + '%')
|
||||||
|
|
||||||
with log_db.connection.cursor() as cursor:
|
with log_db.connection.cursor() as cursor:
|
||||||
# Raw file access counts for the table (top files)
|
# Raw file access counts (top files)
|
||||||
cursor.execute('''
|
query = f'''
|
||||||
SELECT rel_path, COUNT(*) as access_count
|
SELECT rel_path, COUNT(*) as access_count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY rel_path
|
GROUP BY rel_path
|
||||||
ORDER BY access_count DESC
|
ORDER BY access_count DESC
|
||||||
LIMIT 20
|
LIMIT 20
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
rows = cursor.fetchall()
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
# Daily access trend for a line chart
|
# Daily access trend for a line chart
|
||||||
cursor.execute('''
|
query = f'''
|
||||||
SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
|
SELECT CAST(timestamp AS DATE) as date, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY CAST(timestamp AS DATE)
|
GROUP BY CAST(timestamp AS DATE)
|
||||||
ORDER BY date
|
ORDER BY date
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
|
daily_access_data = [dict(date=str(row[0]), count=row[1]) for row in cursor.fetchall()]
|
||||||
|
|
||||||
# Aggregate download counts by time bucket according to the timeframe.
|
# Aggregate download counts by time bucket according to the timeframe.
|
||||||
if timeframe == 'today':
|
if timeframe == 'today':
|
||||||
# Group by hour using to_char
|
query = f'''
|
||||||
cursor.execute('''
|
|
||||||
SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
|
SELECT to_char(timestamp, 'HH24') as bucket, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY bucket
|
GROUP BY bucket
|
||||||
ORDER BY bucket
|
ORDER BY bucket
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
elif timeframe in ('7days', '30days'):
|
elif timeframe in ('7days', '30days'):
|
||||||
# Group by day
|
query = f'''
|
||||||
cursor.execute('''
|
|
||||||
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
|
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY bucket
|
GROUP BY bucket
|
||||||
ORDER BY bucket
|
ORDER BY bucket
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
elif timeframe == '365days':
|
elif timeframe == '365days':
|
||||||
# Group by month using to_char
|
query = f'''
|
||||||
cursor.execute('''
|
|
||||||
SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
|
SELECT to_char(timestamp, 'YYYY-MM') as bucket, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY bucket
|
GROUP BY bucket
|
||||||
ORDER BY bucket
|
ORDER BY bucket
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
else:
|
else:
|
||||||
# Fallback: group by day
|
query = f'''
|
||||||
cursor.execute('''
|
|
||||||
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
|
SELECT CAST(timestamp AS DATE) as bucket, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY bucket
|
GROUP BY bucket
|
||||||
ORDER BY bucket
|
ORDER BY bucket
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
|
timeframe_data = [dict(bucket=row[0], count=row[1]) for row in cursor.fetchall()]
|
||||||
|
|
||||||
# User agent distribution (aggregate by device type)
|
# User agent distribution (aggregate by device type)
|
||||||
cursor.execute('''
|
query = f'''
|
||||||
SELECT user_agent, COUNT(*) as count
|
SELECT user_agent, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY user_agent
|
GROUP BY user_agent
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
|
raw_user_agents = [dict(user_agent=row[0], count=row[1]) for row in cursor.fetchall()]
|
||||||
device_counts = {}
|
device_counts = {}
|
||||||
for entry in raw_user_agents:
|
for entry in raw_user_agents:
|
||||||
@ -202,13 +230,14 @@ def dashboard():
|
|||||||
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
|
user_agent_data = [dict(device=device, count=count) for device, count in device_counts.items()]
|
||||||
|
|
||||||
# Parent folder distribution
|
# Parent folder distribution
|
||||||
cursor.execute('''
|
query = f'''
|
||||||
SELECT rel_path, COUNT(*) as count
|
SELECT rel_path, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY rel_path
|
GROUP BY rel_path
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
folder_data = {}
|
folder_data = {}
|
||||||
for row in cursor.fetchall():
|
for row in cursor.fetchall():
|
||||||
rel_path = row[0]
|
rel_path = row[0]
|
||||||
@ -219,23 +248,27 @@ def dashboard():
|
|||||||
folder_data = folder_data[:10]
|
folder_data = folder_data[:10]
|
||||||
|
|
||||||
# Aggregate IP addresses with counts
|
# Aggregate IP addresses with counts
|
||||||
cursor.execute('''
|
query = f'''
|
||||||
SELECT ip_address, COUNT(*) as count
|
SELECT ip_address, COUNT(*) as count
|
||||||
FROM file_access_log
|
FROM file_access_log
|
||||||
WHERE timestamp >= %s
|
WHERE timestamp >= %s {filetype_filter_sql}
|
||||||
GROUP BY ip_address
|
GROUP BY ip_address
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
''', (start,))
|
'''
|
||||||
|
cursor.execute(query, params)
|
||||||
ip_rows = cursor.fetchall()
|
ip_rows = cursor.fetchall()
|
||||||
|
|
||||||
# Summary stats using separate SQL queries
|
# Summary stats using separate SQL queries
|
||||||
cursor.execute('SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s', (start,))
|
query = f'SELECT COUNT(*) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
|
||||||
|
cursor.execute(query, params)
|
||||||
total_accesses = cursor.fetchone()[0]
|
total_accesses = cursor.fetchone()[0]
|
||||||
|
|
||||||
cursor.execute('SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s', (start,))
|
query = f'SELECT COUNT(DISTINCT rel_path) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
|
||||||
|
cursor.execute(query, params)
|
||||||
unique_files = cursor.fetchone()[0]
|
unique_files = cursor.fetchone()[0]
|
||||||
|
|
||||||
cursor.execute('SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s', (start,))
|
query = f'SELECT COUNT(DISTINCT device_id) FROM file_access_log WHERE timestamp >= %s {filetype_filter_sql}'
|
||||||
|
cursor.execute(query, params)
|
||||||
unique_user = cursor.fetchone()[0]
|
unique_user = cursor.fetchone()[0]
|
||||||
|
|
||||||
# Process location data with GeoIP2.
|
# Process location data with GeoIP2.
|
||||||
@ -263,3 +296,4 @@ def dashboard():
|
|||||||
unique_user=unique_user,
|
unique_user=unique_user,
|
||||||
timeframe_data=timeframe_data)
|
timeframe_data=timeframe_data)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -19,13 +19,13 @@ services:
|
|||||||
- FLASK_ENV=production
|
- FLASK_ENV=production
|
||||||
- TITLE_SHORT=${TITLE_SHORT}
|
- TITLE_SHORT=${TITLE_SHORT}
|
||||||
- TITLE_LONG=${TITLE_LONG}
|
- TITLE_LONG=${TITLE_LONG}
|
||||||
- DB_HOST=${CONTAINER_NAME}.sql
|
- DB_HOST=postgres-db
|
||||||
- DB_PORT=5432
|
- DB_PORT=5432
|
||||||
- DB_USER=${POSTGRES_USER}
|
- DB_USER=${DB_USER}
|
||||||
- DB_PASSWORD=${POSTGRES_PASSWORD}
|
- DB_PASSWORD=${DB_PASSWORD}
|
||||||
- DB_NAME=${POSTGRES_DB}
|
- DB_NAME=${DB_NAME}
|
||||||
depends_on:
|
depends_on:
|
||||||
- postgres
|
- "postgres"
|
||||||
networks:
|
networks:
|
||||||
- traefik
|
- traefik
|
||||||
- internal
|
- internal
|
||||||
@ -54,16 +54,18 @@ services:
|
|||||||
|
|
||||||
postgres:
|
postgres:
|
||||||
image: postgres:15
|
image: postgres:15
|
||||||
container_name: "${CONTAINER_NAME}.sql"
|
container_name: "${CONTAINER_NAME}-db"
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_USER: ${POSTGRES_USER}
|
POSTGRES_USER: ${DB_USER:?}
|
||||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
POSTGRES_PASSWORD: ${DB_PASSWORD:?}
|
||||||
POSTGRES_DB: ${POSTGRES_DB}
|
POSTGRES_DB: ${DB_NAME:?}
|
||||||
volumes:
|
volumes:
|
||||||
- ./postgres_data:/var/lib/postgresql/data
|
- ./postgres_data:/var/lib/postgresql/data
|
||||||
networks:
|
networks:
|
||||||
- internal
|
internal:
|
||||||
|
aliases:
|
||||||
|
- postgres-db
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
traefik:
|
traefik:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user