diff --git a/.gitignore b/.gitignore index c09317f..5ecf119 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ /postgres_data /instance /__pycache__ +/search.db /access_log.db /access_log.db.bak /folder_config.json diff --git a/index_for_search.py b/index_for_search.py index c79511d..0edd825 100644 --- a/index_for_search.py +++ b/index_for_search.py @@ -3,13 +3,20 @@ import json import sqlite3 SEARCH_DB_NAME = 'search.db' +ACCESS_LOG_DB_NAME = 'access_log.db' +# Connect to the search database. search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False) search_db.row_factory = sqlite3.Row +# Open access_log.db in read-only mode. +access_log_db = sqlite3.connect(f'file:{ACCESS_LOG_DB_NAME}?mode=ro', uri=True) +access_log_db.row_factory = sqlite3.Row + def init_db(): """Initializes the database with the required schema.""" cursor = search_db.cursor() + # Create table with the new 'hitcount' column. cursor.execute(''' CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -17,10 +24,18 @@ def init_db(): filename TEXT, filetype TEXT, transcript TEXT, + hitcount INTEGER DEFAULT 0, UNIQUE(relative_path, filename) ) ''') search_db.commit() + # If the table already existed, try to add the 'hitcount' column. + try: + cursor.execute("ALTER TABLE files ADD COLUMN hitcount INTEGER DEFAULT 0") + except sqlite3.OperationalError: + # Likely the column already exists, so we ignore this error. + pass + search_db.commit() def scan_dir(directory): """Recursively scan directories using os.scandir for improved performance.""" @@ -37,6 +52,13 @@ def scan_dir(directory): except PermissionError: return +def get_hit_count(relative_path): + """Returns the hit count for a given file from the access log database.""" + cursor = access_log_db.cursor() + cursor.execute("SELECT COUNT(*) AS hit_count FROM file_access_log WHERE rel_path = ?", (relative_path,)) + row = cursor.fetchone() + return row["hit_count"] if row else 0 + def updatefileindex(): cursor = search_db.cursor() @@ -54,7 +76,7 @@ def updatefileindex(): base_len = len(norm_folderpath) + 1 # Accumulate scanned file data and keys for this base folder. - scanned_files = [] # Each entry: (relative_path, filename, filetype, transcript) + scanned_files = [] # Each entry: (relative_path, filename, filetype, transcript, hitcount) current_keys = set() for entry in scan_dir(norm_folderpath): @@ -66,7 +88,6 @@ def updatefileindex(): rel_part = os.path.relpath(entry_path, norm_folderpath) # Prepend the foldername so it becomes part of the stored relative path. relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/') - print(relative_path) filetype = os.path.splitext(entry.name)[1].lower() transcript = None @@ -82,7 +103,10 @@ def updatefileindex(): except Exception: transcript = None - scanned_files.append((relative_path, entry.name, filetype, transcript)) + # Retrieve the hit count for this file. + hit_count = get_hit_count(relative_path) + + scanned_files.append((relative_path, entry.name, filetype, transcript, hit_count)) current_keys.add((relative_path, entry.name)) # Remove database entries for files under this base folder that are no longer on disk. @@ -96,7 +120,7 @@ def updatefileindex(): # Bulk write the scanned files using INSERT OR REPLACE. cursor.executemany( - "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)", + "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?)", scanned_files ) @@ -105,9 +129,9 @@ def updatefileindex(): return "File index updated successfully" - if __name__ == "__main__": init_db() # Initialize the database schema if it doesn't exist updatefileindex() # Update the file index - search_db.close() # Close the database connection - print("Database connection closed.") \ No newline at end of file + search_db.close() # Close the search database connection + access_log_db.close() # Close the access log connection + print("Database connections closed.") diff --git a/search.db b/search.db deleted file mode 100644 index 2ef6f3b..0000000 Binary files a/search.db and /dev/null differ