add hitcount to search index

2025-04-05 09:19:33 +02:00 · 2025-04-05 09:19:33 +02:00 · 5ae6a1dea2
commit 5ae6a1dea2
parent 1fc51d578e
3 changed files with 32 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,7 @@
 /postgres_data
 /instance
 /__pycache__
 /search.db
 /access_log.db
 /access_log.db.bak
 /folder_config.json
--- a/index_for_search.py
+++ b/index_for_search.py
@ -3,13 +3,20 @@ import json
 import sqlite3
 SEARCH_DB_NAME = 'search.db'
 ACCESS_LOG_DB_NAME = 'access_log.db'
 # Connect to the search database.
 search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
 search_db.row_factory = sqlite3.Row
 # Open access_log.db in read-only mode.
 access_log_db = sqlite3.connect(f'file:{ACCESS_LOG_DB_NAME}?mode=ro', uri=True)
 access_log_db.row_factory = sqlite3.Row
 def init_db():
    """Initializes the database with the required schema."""
    cursor = search_db.cursor()
    # Create table with the new 'hitcount' column.
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS files (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
@ -17,10 +24,18 @@ def init_db():
            filename TEXT,
            filetype TEXT,
            transcript TEXT,
            hitcount INTEGER DEFAULT 0,
            UNIQUE(relative_path, filename)
        )
    ''')
    search_db.commit()
    # If the table already existed, try to add the 'hitcount' column.
    try:
        cursor.execute("ALTER TABLE files ADD COLUMN hitcount INTEGER DEFAULT 0")
    except sqlite3.OperationalError:
        # Likely the column already exists, so we ignore this error.
        pass
    search_db.commit()
 def scan_dir(directory):
    """Recursively scan directories using os.scandir for improved performance."""
@ -37,6 +52,13 @@ def scan_dir(directory):
    except PermissionError:
        return
 def get_hit_count(relative_path):
    """Returns the hit count for a given file from the access log database."""
    cursor = access_log_db.cursor()
    cursor.execute("SELECT COUNT(*) AS hit_count FROM file_access_log WHERE rel_path = ?", (relative_path,))
    row = cursor.fetchone()
    return row["hit_count"] if row else 0
 def updatefileindex():
    cursor = search_db.cursor()
@ -54,7 +76,7 @@ def updatefileindex():
            base_len = len(norm_folderpath) + 1
            # Accumulate scanned file data and keys for this base folder.
-            scanned_files = []  # Each entry: (relative_path, filename, filetype, transcript)
+            scanned_files = []  # Each entry: (relative_path, filename, filetype, transcript, hitcount)
            current_keys = set()
            for entry in scan_dir(norm_folderpath):
@ -66,7 +88,6 @@ def updatefileindex():
                    rel_part = os.path.relpath(entry_path, norm_folderpath)
                # Prepend the foldername so it becomes part of the stored relative path.
                relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
                print(relative_path)
                filetype = os.path.splitext(entry.name)[1].lower()
                transcript = None
@ -82,7 +103,10 @@ def updatefileindex():
                    except Exception:
                        transcript = None
-                scanned_files.append((relative_path, entry.name, filetype, transcript))
+                # Retrieve the hit count for this file.
                hit_count = get_hit_count(relative_path)
                scanned_files.append((relative_path, entry.name, filetype, transcript, hit_count))
                current_keys.add((relative_path, entry.name))
            # Remove database entries for files under this base folder that are no longer on disk.
@ -96,7 +120,7 @@ def updatefileindex():
            # Bulk write the scanned files using INSERT OR REPLACE.
            cursor.executemany(
-                "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)",
+                "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?)",
                scanned_files
            )
@ -105,9 +129,9 @@ def updatefileindex():
    return "File index updated successfully"
 if __name__ == "__main__":
    init_db()  # Initialize the database schema if it doesn't exist
    updatefileindex()  # Update the file index
-    search_db.close()  # Close the database connection
+    search_db.close()  # Close the search database connection
-    print("Database connection closed.")
+    access_log_db.close()  # Close the access log connection
    print("Database connections closed.")
--- a/search.db
+++ b/search.db