add hitcount to search index
This commit is contained in:
parent
1fc51d578e
commit
5ae6a1dea2
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,6 +7,7 @@
|
||||
/postgres_data
|
||||
/instance
|
||||
/__pycache__
|
||||
/search.db
|
||||
/access_log.db
|
||||
/access_log.db.bak
|
||||
/folder_config.json
|
||||
|
||||
@ -3,13 +3,20 @@ import json
|
||||
import sqlite3
|
||||
|
||||
SEARCH_DB_NAME = 'search.db'
|
||||
ACCESS_LOG_DB_NAME = 'access_log.db'
|
||||
|
||||
# Connect to the search database.
|
||||
search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
|
||||
search_db.row_factory = sqlite3.Row
|
||||
|
||||
# Open access_log.db in read-only mode.
|
||||
access_log_db = sqlite3.connect(f'file:{ACCESS_LOG_DB_NAME}?mode=ro', uri=True)
|
||||
access_log_db.row_factory = sqlite3.Row
|
||||
|
||||
def init_db():
|
||||
"""Initializes the database with the required schema."""
|
||||
cursor = search_db.cursor()
|
||||
# Create table with the new 'hitcount' column.
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@ -17,10 +24,18 @@ def init_db():
|
||||
filename TEXT,
|
||||
filetype TEXT,
|
||||
transcript TEXT,
|
||||
hitcount INTEGER DEFAULT 0,
|
||||
UNIQUE(relative_path, filename)
|
||||
)
|
||||
''')
|
||||
search_db.commit()
|
||||
# If the table already existed, try to add the 'hitcount' column.
|
||||
try:
|
||||
cursor.execute("ALTER TABLE files ADD COLUMN hitcount INTEGER DEFAULT 0")
|
||||
except sqlite3.OperationalError:
|
||||
# Likely the column already exists, so we ignore this error.
|
||||
pass
|
||||
search_db.commit()
|
||||
|
||||
def scan_dir(directory):
|
||||
"""Recursively scan directories using os.scandir for improved performance."""
|
||||
@ -37,6 +52,13 @@ def scan_dir(directory):
|
||||
except PermissionError:
|
||||
return
|
||||
|
||||
def get_hit_count(relative_path):
|
||||
"""Returns the hit count for a given file from the access log database."""
|
||||
cursor = access_log_db.cursor()
|
||||
cursor.execute("SELECT COUNT(*) AS hit_count FROM file_access_log WHERE rel_path = ?", (relative_path,))
|
||||
row = cursor.fetchone()
|
||||
return row["hit_count"] if row else 0
|
||||
|
||||
def updatefileindex():
|
||||
cursor = search_db.cursor()
|
||||
|
||||
@ -54,7 +76,7 @@ def updatefileindex():
|
||||
base_len = len(norm_folderpath) + 1
|
||||
|
||||
# Accumulate scanned file data and keys for this base folder.
|
||||
scanned_files = [] # Each entry: (relative_path, filename, filetype, transcript)
|
||||
scanned_files = [] # Each entry: (relative_path, filename, filetype, transcript, hitcount)
|
||||
current_keys = set()
|
||||
|
||||
for entry in scan_dir(norm_folderpath):
|
||||
@ -66,7 +88,6 @@ def updatefileindex():
|
||||
rel_part = os.path.relpath(entry_path, norm_folderpath)
|
||||
# Prepend the foldername so it becomes part of the stored relative path.
|
||||
relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
|
||||
print(relative_path)
|
||||
filetype = os.path.splitext(entry.name)[1].lower()
|
||||
transcript = None
|
||||
|
||||
@ -82,7 +103,10 @@ def updatefileindex():
|
||||
except Exception:
|
||||
transcript = None
|
||||
|
||||
scanned_files.append((relative_path, entry.name, filetype, transcript))
|
||||
# Retrieve the hit count for this file.
|
||||
hit_count = get_hit_count(relative_path)
|
||||
|
||||
scanned_files.append((relative_path, entry.name, filetype, transcript, hit_count))
|
||||
current_keys.add((relative_path, entry.name))
|
||||
|
||||
# Remove database entries for files under this base folder that are no longer on disk.
|
||||
@ -96,7 +120,7 @@ def updatefileindex():
|
||||
|
||||
# Bulk write the scanned files using INSERT OR REPLACE.
|
||||
cursor.executemany(
|
||||
"INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)",
|
||||
"INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript, hitcount) VALUES (?, ?, ?, ?, ?)",
|
||||
scanned_files
|
||||
)
|
||||
|
||||
@ -105,9 +129,9 @@ def updatefileindex():
|
||||
|
||||
return "File index updated successfully"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
init_db() # Initialize the database schema if it doesn't exist
|
||||
updatefileindex() # Update the file index
|
||||
search_db.close() # Close the database connection
|
||||
print("Database connection closed.")
|
||||
search_db.close() # Close the search database connection
|
||||
access_log_db.close() # Close the access log connection
|
||||
print("Database connections closed.")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user