exclude tiny range requests from logging

This commit is contained in:
lelo 2025-12-25 22:17:50 +00:00
parent 311fd89a54
commit 495f33aa68
2 changed files with 61 additions and 8 deletions

View File

@ -114,10 +114,13 @@ def parse_timestamp(ts_str):
# If it's some other ValueError, re-raise it.
raise
def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached):
def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached, method="GET"):
"""Insert a file access record into the database and prune entries older than 10 minutes,
and track todays files separately in folder_today."""
and track todays files separately in folder_today. HTTP method is *not* persisted to the
database; it is kept only in the in-memory buffer to distinguish HEAD vs GET for the
recent-logs feed."""
global file_access_temp, folder_today, folder_yesterday
http_method = (method or "GET").upper()
# Create a timezone-aware timestamp
now = datetime.now(timezone.utc).astimezone()
@ -192,6 +195,7 @@ def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id,
# Finally, insert the new access at the top of the temp log
# Keep existing columns stable; append raw geo data for map use.
# Keep method only in memory for the 10-minute feed (DB remains untouched by method).
file_access_temp.insert(0, [
iso_ts, # 0 timestamp
rel_path, # 1 path
@ -204,7 +208,8 @@ def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id,
city, # 8 city
country, # 9 country
lat, # 10 latitude
lon # 11 longitude
lon, # 11 longitude
http_method # 12 http method (in-memory only)
])
return True
@ -263,6 +268,25 @@ def return_file_access():
".ogg", ".wma", ".aiff", ".alac", ".opus"
))
def is_get(entry):
"""Allow only GET requests in the recent feed to avoid HEAD-prefetch noise."""
method_val = "GET"
if len(entry) > 12 and entry[12]:
method_val = str(entry[12]).upper()
return method_val != "HEAD"
def has_bytes(entry):
"""Ignore zero-byte requests (e.g., Apple prefetches asking for 0 bytes)."""
try:
size_val = entry[2]
if size_val is None:
return False
# handle str or numeric
size_num = float(size_val)
return size_num > 0
except Exception:
return False
if not file_access_temp:
return []
@ -273,8 +297,11 @@ def return_file_access():
entry for entry in file_access_temp
if datetime.fromisoformat(entry[0]) >= cutoff_time
]
# Only expose audio file accesses to the UI
return [entry for entry in file_access_temp if is_audio(entry)]
audio_entries = [
entry for entry in file_access_temp
if is_audio(entry) and is_get(entry) and has_bytes(entry)
]
return audio_entries
def return_file_access_with_geo():

32
app.py
View File

@ -781,12 +781,36 @@ def serve_file(subpath):
is_audio_get = mime.startswith('audio/') and request.method == 'GET'
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
range_header = request.headers.get('Range', '')
def is_range_prefetch(header, ua):
"""
Detect tiny range requests (common Apple prefetch) so we can skip logging duplicates.
"""
if not header:
return False
try:
if not header.lower().startswith('bytes='):
return False
range_spec = header.split('=', 1)[1]
start_str, end_str = range_spec.split('-', 1)
if not start_str.isdigit() or not end_str.isdigit():
return False
start = int(start_str)
end = int(end_str)
length = end - start + 1
if length <= 1024 and start == 0:
return True
except Exception:
return False
return False
# Logging: log every client GET (cached or not), but skip CDN prefetches (X-Cache-Request)
# and HEAD probes to avoid double-counting.
# and HEAD probes to avoid double-counting. Also skip tiny range-prefetches (e.g., Apple).
do_log = (
not is_cache_request # skip if upstream CDN asked us to cache
and request.method != 'HEAD'
and not is_range_prefetch(range_header, user_agent)
)
# 3) Pick cache
@ -869,7 +893,8 @@ def serve_file(subpath):
ip_address,
user_agent,
session['device_id'],
cached_hit
cached_hit,
request.method
)
return response
@ -1023,7 +1048,8 @@ def serve_file(subpath):
ip_address,
user_agent,
session['device_id'],
cached_hit
cached_hit,
request.method
)
return response