remove storing ip address

This commit is contained in:
lelo 2025-04-03 18:12:37 +02:00
parent 262a31e65a
commit a9b4b23b07
4 changed files with 104 additions and 20 deletions

View File

@ -13,6 +13,9 @@ DB_NAME = 'access_log.db'
# Create a single global connection to SQLite # Create a single global connection to SQLite
log_db = sqlite3.connect(DB_NAME, check_same_thread=False) log_db = sqlite3.connect(DB_NAME, check_same_thread=False)
# geo location
geoReader = geoip2.database.Reader('GeoLite2-City.mmdb')
def init_log_db(): def init_log_db():
"""Create the file_access_log table if it doesn't already exist.""" """Create the file_access_log table if it doesn't already exist."""
with log_db: with log_db:
@ -23,7 +26,8 @@ def init_log_db():
rel_path TEXT, rel_path TEXT,
filesize INTEGER, filesize INTEGER,
mime TEXT, mime TEXT,
ip_address TEXT, city TEXT,
country TEXT,
user_agent TEXT, user_agent TEXT,
device_id TEXT, device_id TEXT,
cached BOOLEAN cached BOOLEAN
@ -32,9 +36,9 @@ def init_log_db():
init_log_db() init_log_db()
def lookup_location(ip, reader): def lookup_location(ip):
try: try:
response = reader.city(ip) response = geoReader.city(ip)
country = response.country.name if response.country.name else "Unknown" country = response.country.name if response.country.name else "Unknown"
city = response.city.name if response.city.name else "Unknown" city = response.city.name if response.city.name else "Unknown"
return country, city return country, city
@ -97,12 +101,15 @@ def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id,
timestamp = datetime.now(timezone.utc).astimezone() timestamp = datetime.now(timezone.utc).astimezone()
iso_ts = timestamp.isoformat() iso_ts = timestamp.isoformat()
# Convert the IP address to a location
city, country = lookup_location(ip_address)
with log_db: with log_db:
log_db.execute(''' log_db.execute('''
INSERT INTO file_access_log INSERT INTO file_access_log
(timestamp, rel_path, filesize, mime, ip_address, user_agent, device_id, cached) (timestamp, rel_path, filesize, mime, city, country, user_agent, device_id, cached)
VALUES (?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached)) ''', (iso_ts, rel_path, filesize, mime, city, country, user_agent, device_id, cached))
# Remove entries older than 10 minutes using our robust parser. # Remove entries older than 10 minutes using our robust parser.
cutoff_time = datetime.now(timezone.utc).astimezone() - timedelta(minutes=10) cutoff_time = datetime.now(timezone.utc).astimezone() - timedelta(minutes=10)
@ -112,7 +119,7 @@ def log_file_access(rel_path, filesize, mime, ip_address, user_agent, device_id,
] ]
# Add the new entry at the beginning of the list # Add the new entry at the beginning of the list
file_access_temp.insert(0, [iso_ts, rel_path, filesize, mime, ip_address, user_agent, device_id, cached]) file_access_temp.insert(0, [iso_ts, rel_path, filesize, mime, f"{city}, {country}", user_agent, device_id, cached])
return True return True
def return_file_access(): def return_file_access():
@ -331,17 +338,17 @@ def dashboard():
folder_data.sort(key=lambda x: x['count'], reverse=True) folder_data.sort(key=lambda x: x['count'], reverse=True)
folder_data = folder_data[:10] folder_data = folder_data[:10]
# 6. Aggregate IP addresses with counts # 6. Aggregate locations with counts
query = f''' query = f'''
SELECT ip_address, COUNT(*) as count SELECT city, country, COUNT(*) as count
FROM file_access_log FROM file_access_log
WHERE timestamp >= ? {filetype_filter_sql} WHERE timestamp >= ? {filetype_filter_sql}
GROUP BY ip_address GROUP BY city
ORDER BY count DESC ORDER BY count DESC
''' '''
with log_db: with log_db:
cursor = log_db.execute(query, params_for_filter) cursor = log_db.execute(query, params_for_filter)
ip_rows = cursor.fetchall() locations = cursor.fetchall()
# 7. Summary stats # 7. Summary stats
# total_accesses # total_accesses
@ -387,14 +394,11 @@ def dashboard():
if cached_percentage is not None: if cached_percentage is not None:
cached_percentage = f"{cached_percentage:.2f}" cached_percentage = f"{cached_percentage:.2f}"
# 8. Process location data with GeoIP2 # 8. Process location data
reader = geoip2.database.Reader('GeoLite2-City.mmdb')
location_data_dict = {} location_data_dict = {}
for (ip_addr, cnt) in ip_rows: for (city, country, cnt) in locations:
country, city = lookup_location(ip_addr, reader)
key = (country, city) key = (country, city)
location_data_dict[key] = location_data_dict.get(key, 0) + cnt location_data_dict[key] = location_data_dict.get(key, 0) + cnt
reader.close()
location_data = [ location_data = [
dict(country=k[0], city=k[1], count=v) dict(country=k[0], city=k[1], count=v)

4
app.py
View File

@ -368,7 +368,7 @@ def query_recent_connections():
'full_path': row[1], 'full_path': row[1],
'filesize': row[2], 'filesize': row[2],
'mime_typ': row[3], 'mime_typ': row[3],
'ip_address': row[4], 'location': row[4],
'user_agent': row[5], 'user_agent': row[5],
'cached': row[7] 'cached': row[7]
} }
@ -410,7 +410,7 @@ def handle_request_initial_data():
'full_path': row[1], 'full_path': row[1],
'filesize' : row[2], 'filesize' : row[2],
'mime_typ' : row[3], 'mime_typ' : row[3],
'ip_address': row[4], 'location': row[4],
'user_agent': row[5], 'user_agent': row[5],
'cached': row[7] 'cached': row[7]
} }

80
convert_ip.py Normal file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env python3
import sqlite3
import geoip2.database
def get_location(ip_address, reader):
"""
Given an IP address, return the location in 'City, Country' format.
If the lookup fails, returns 'Unknown, Unknown'.
"""
try:
response = reader.city(ip_address)
city = response.city.name if response.city.name else "Unknown"
country = response.country.name if response.country.name else "Unknown"
return city, country
except Exception:
return "Unknown, Unknown"
def main():
# Initialize the GeoLite2 reader
reader = geoip2.database.Reader('GeoLite2-City.mmdb')
# Connect to your SQLite database (update the path if necessary)
conn = sqlite3.connect('access_log.db')
conn.row_factory = sqlite3.Row # Enable name-based access to columns
cursor = conn.cursor()
# Create a new table with the updated structure (location instead of ip_address)
cursor.execute('''
CREATE TABLE IF NOT EXISTS file_access_log_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT,
rel_path TEXT,
filesize INTEGER,
mime TEXT,
city TEXT,
country TEXT,
user_agent TEXT,
device_id TEXT,
cached BOOLEAN
)
''')
conn.commit()
# Read all rows from the old table
cursor.execute('SELECT * FROM file_access_log')
rows = cursor.fetchall()
# Insert rows into the new table, converting IP addresses to locations
for row in rows:
timestamp = row['timestamp']
rel_path = row['rel_path']
filesize = row['filesize']
mime = row['mime']
ip_address = row['ip_address']
city, country = get_location(ip_address, reader)
user_agent = row['user_agent']
device_id = row['device_id']
cached = row['cached']
cursor.execute('''
INSERT INTO file_access_log_new
(timestamp, rel_path, filesize, mime, city, country, user_agent, device_id, cached)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (timestamp, rel_path, filesize, mime, city, country, user_agent, device_id, cached))
conn.commit()
# Optional: Replace the old table with the new table
cursor.execute('DROP TABLE file_access_log')
cursor.execute('ALTER TABLE file_access_log_new RENAME TO file_access_log')
conn.commit()
# Clean up: close connections
conn.close()
reader.close()
print("Database conversion complete.")
if __name__ == '__main__':
main()

View File

@ -44,7 +44,7 @@
<thead class="table-secondary"> <thead class="table-secondary">
<tr> <tr>
<th>Timestamp</th> <th>Timestamp</th>
<th>IP Address</th> <th>Location</th>
<th>User Agent</th> <th>User Agent</th>
<th>File Path</th> <th>File Path</th>
<td>File Size</td> <td>File Size</td>
@ -77,7 +77,7 @@
const row = document.createElement('tr'); const row = document.createElement('tr');
row.innerHTML = ` row.innerHTML = `
<td>${record.timestamp}</td> <td>${record.timestamp}</td>
<td>${record.ip_address}</td> <td>${record.location}</td>
<td>${record.user_agent}</td> <td>${record.user_agent}</td>
<td>${record.full_path}</td> <td>${record.full_path}</td>
<td>${record.filesize}</td> <td>${record.filesize}</td>