From 495f33aa6837f8f96819bd66b880155c023b10d7 Mon Sep 17 00:00:00 2001 From: lelo Date: Thu, 25 Dec 2025 22:17:50 +0000 Subject: [PATCH] exclude tiny range requests from logging --- analytics.py | 37 ++++++++++++++++++++++++++++++++----- app.py | 32 +++++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/analytics.py b/analytics.py index ff6428e..87b4346 100644 --- a/analytics.py +++ b/analytics.py @@ -114,10 +114,13 @@ def parse_timestamp(ts_str): # If it's some other ValueError, re-raise it. raise -def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached): +def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, cached, method="GET"): """Insert a file access record into the database and prune entries older than 10 minutes, - and track today’s files separately in folder_today.""" + and track today’s files separately in folder_today. HTTP method is *not* persisted to the + database; it is kept only in the in-memory buffer to distinguish HEAD vs GET for the + recent-logs feed.""" global file_access_temp, folder_today, folder_yesterday + http_method = (method or "GET").upper() # Create a timezone-aware timestamp now = datetime.now(timezone.utc).astimezone() @@ -192,6 +195,7 @@ def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, # Finally, insert the new access at the top of the temp log # Keep existing columns stable; append raw geo data for map use. + # Keep method only in memory for the 10-minute feed (DB remains untouched by method). file_access_temp.insert(0, [ iso_ts, # 0 timestamp rel_path, # 1 path @@ -204,7 +208,8 @@ def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id, city, # 8 city country, # 9 country lat, # 10 latitude - lon # 11 longitude + lon, # 11 longitude + http_method # 12 http method (in-memory only) ]) return True @@ -263,6 +268,25 @@ def return_file_access(): ".ogg", ".wma", ".aiff", ".alac", ".opus" )) + def is_get(entry): + """Allow only GET requests in the recent feed to avoid HEAD-prefetch noise.""" + method_val = "GET" + if len(entry) > 12 and entry[12]: + method_val = str(entry[12]).upper() + return method_val != "HEAD" + + def has_bytes(entry): + """Ignore zero-byte requests (e.g., Apple prefetches asking for 0 bytes).""" + try: + size_val = entry[2] + if size_val is None: + return False + # handle str or numeric + size_num = float(size_val) + return size_num > 0 + except Exception: + return False + if not file_access_temp: return [] @@ -273,8 +297,11 @@ def return_file_access(): entry for entry in file_access_temp if datetime.fromisoformat(entry[0]) >= cutoff_time ] - # Only expose audio file accesses to the UI - return [entry for entry in file_access_temp if is_audio(entry)] + audio_entries = [ + entry for entry in file_access_temp + if is_audio(entry) and is_get(entry) and has_bytes(entry) + ] + return audio_entries def return_file_access_with_geo(): diff --git a/app.py b/app.py index 0dee807..6ac00f5 100755 --- a/app.py +++ b/app.py @@ -781,12 +781,36 @@ def serve_file(subpath): is_audio_get = mime.startswith('audio/') and request.method == 'GET' ip_address = request.remote_addr user_agent = request.headers.get('User-Agent') + range_header = request.headers.get('Range', '') + + def is_range_prefetch(header, ua): + """ + Detect tiny range requests (common Apple prefetch) so we can skip logging duplicates. + """ + if not header: + return False + try: + if not header.lower().startswith('bytes='): + return False + range_spec = header.split('=', 1)[1] + start_str, end_str = range_spec.split('-', 1) + if not start_str.isdigit() or not end_str.isdigit(): + return False + start = int(start_str) + end = int(end_str) + length = end - start + 1 + if length <= 1024 and start == 0: + return True + except Exception: + return False + return False # Logging: log every client GET (cached or not), but skip CDN prefetches (X-Cache-Request) - # and HEAD probes to avoid double-counting. + # and HEAD probes to avoid double-counting. Also skip tiny range-prefetches (e.g., Apple). do_log = ( not is_cache_request # skip if upstream CDN asked us to cache and request.method != 'HEAD' + and not is_range_prefetch(range_header, user_agent) ) # 3) Pick cache @@ -869,7 +893,8 @@ def serve_file(subpath): ip_address, user_agent, session['device_id'], - cached_hit + cached_hit, + request.method ) return response @@ -1023,7 +1048,8 @@ def serve_file(subpath): ip_address, user_agent, session['device_id'], - cached_hit + cached_hit, + request.method ) return response