diff --git a/index_for_search.py b/index_for_search.py index f6ba0ed..a5c0cb5 100755 --- a/index_for_search.py +++ b/index_for_search.py @@ -4,6 +4,7 @@ import sqlite3 from datetime import datetime import re import helperfunctions as hf +from collections import defaultdict SEARCH_DB_NAME = 'search.db' ACCESS_LOG_DB_NAME = 'access_log.db' @@ -17,6 +18,10 @@ search_db.row_factory = sqlite3.Row access_log_db = sqlite3.connect(f'file:{ACCESS_LOG_DB_NAME}?mode=ro', uri=True) access_log_db.row_factory = sqlite3.Row +def log(message: str): + """Small helper to ensure console output is flushed immediately.""" + print(message, flush=True) + def init_db(): """Initializes the database with the required schema.""" cursor = search_db.cursor() @@ -54,13 +59,16 @@ def scan_dir(directory): with os.scandir(directory) as it: for entry in it: if entry.is_dir(follow_symlinks=False): - # Skip transcription directories immediately. - if entry.name.lower() == "transkription": + # Skip unwanted directories immediately. + if entry.name.startswith(('.', '@', '#')): + continue + if entry.name.lower() in {"transkription", ".app", "#recycle"}: continue yield from scan_dir(entry.path) elif entry.is_file(follow_symlinks=False): yield entry except PermissionError: + log(f"Permission denied: {directory}") return def get_hit_count(relative_path): @@ -71,8 +79,65 @@ def get_hit_count(relative_path): return row["hit_count"] if row else 0 +def get_hit_counts_for_basefolder(basefolder: str) -> dict: + """Return a map of rel_path -> hit_count for all files under a basefolder.""" + cursor = access_log_db.cursor() + pattern = f"{basefolder}/%" + cursor.execute( + "SELECT rel_path, COUNT(*) AS hit_count FROM file_access_log WHERE rel_path LIKE ? GROUP BY rel_path", + (pattern,) + ) + return {row["rel_path"]: row["hit_count"] for row in cursor.fetchall()} + + +def log_structure(root_path, max_depth=None, show_files=False): + """ + Log folder structure up to max_depth levels (root = depth 1). + If max_depth is None, traverse all depths. Files are logged only when show_files is True. + """ + depth_label = "all" if max_depth is None else f"<= {max_depth}" + log(f"Folder structure (depth {depth_label}) for '{root_path}':") + + def _walk(path, depth): + if max_depth is not None and depth > max_depth: + return + try: + with os.scandir(path) as it: + entries = sorted(it, key=lambda e: (not e.is_dir(follow_symlinks=False), e.name.lower())) + for entry in entries: + if entry.is_dir(follow_symlinks=False): + if entry.name.startswith(('.', '@', '#')): + continue + if entry.name.lower() in {"transkription"}: + continue + indent = " " * (depth - 1) + log(f"{indent}- {entry.name}/") + _walk(entry.path, depth + 1) + elif show_files: + indent = " " * (depth - 1) + log(f"{indent}- {entry.name}") + except PermissionError: + indent = " " * (depth - 1) + log(f"{indent}- [permission denied]") + + _walk(root_path, depth=1) + + +def log_file(relative_path: str, filename: str): + """Debug helper to log each file that is indexed.""" + log(f" file: {relative_path} ({filename})") + + +def log_directory_batch(directory: str, files: list[str]): + """Log file count for a directory without listing filenames.""" + if not files: + return + log(f" Dir {directory or '/'}: {len(files)} files") + + def updatefileindex(): cursor = search_db.cursor() + totals = {"folders": 0, "scanned": 0, "deleted": 0} # Load folder configuration from JSON file. with open(FOLDER_CONFIG, "r", encoding="utf-8") as f: @@ -81,16 +146,21 @@ def updatefileindex(): # Process each configured base folder. for config in config_data: for folder in config.get("folders", []): + totals["folders"] += 1 foldername = folder.get("foldername") - print(f"Processing folder: {foldername}") + log(f"Processing folder: {foldername}") raw_folderpath = folder.get("folderpath") norm_folderpath = os.path.normpath(raw_folderpath) + log_structure(norm_folderpath, max_depth=None, show_files=False) # Precompute the length of the base folder path (plus one for the separator) base_len = len(norm_folderpath) + 1 + # Prefetch hit counts for this basefolder to avoid per-file queries + hitcount_map = get_hit_counts_for_basefolder(foldername) # Accumulate scanned file data and keys for this base folder. scanned_files = [] # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount) current_keys = set() + dir_files = defaultdict(list) # map of directory -> list of filenames for entry in scan_dir(norm_folderpath): transcript = None entry_path = os.path.normpath(entry.path) @@ -103,12 +173,8 @@ def updatefileindex(): relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/') filetype = os.path.splitext(entry.name)[1].lower() - if filetype not in ['.mp3', '.wav', '.ogg', '.m4a', '.flac']: - # Skip non-audio files. - continue - - # Retrieve the hit count for this file. - hit_count = get_hit_count(relative_path) + # Retrieve the hit count for this file from pre-fetched map. + hit_count = hitcount_map.get(relative_path, 0) # Determine the site if foldername == 'Gottesdienste Speyer': @@ -134,15 +200,24 @@ def updatefileindex(): performance_date = hf.extract_date_from_string(relative_path) + # Debug: batch file logging per directory + dir_files[os.path.dirname(relative_path)].append(entry.name) + scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count)) current_keys.add((relative_path, entry.name)) + # After scanning, log grouped files per directory + for d, files in dir_files.items(): + log_directory_batch(d, files) + # Remove database entries for files under this base folder that are no longer on disk. pattern = foldername + os.sep + '%' cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,)) db_rows = cursor.fetchall() keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows) keys_to_delete = keys_in_db - current_keys + deleted_count = len(keys_to_delete) + totals["deleted"] += deleted_count for key in keys_to_delete: cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key) @@ -154,7 +229,11 @@ def updatefileindex(): # Commit changes after processing this base folder. search_db.commit() + folder_scanned = len(scanned_files) + totals["scanned"] += folder_scanned + log(f"Indexed {folder_scanned} files (deleted {deleted_count}) in '{foldername}'") + log(f"Index update finished: folders={totals['folders']}, files indexed={totals['scanned']}, removed={totals['deleted']}") return "File index updated successfully" def convert_dates(search_db, diff --git a/search.py b/search.py index 2550c46..3ea090b 100644 --- a/search.py +++ b/search.py @@ -12,12 +12,23 @@ search_db.row_factory = sqlite3.Row with open("app_config.json", 'r') as file: app_config = json.load(file) +FILETYPE_GROUPS = { + 'audio': ('.mp3', '.wav', '.ogg', '.m4a', '.flac'), + 'video': ('.mp4', '.mov', '.mkv', '.avi', '.webm'), + 'image': ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff') +} +ALL_GROUP_EXTS = tuple(sorted({ext for group in FILETYPE_GROUPS.values() for ext in group})) + def searchcommand(): query = request.form.get("query", "").strip() category = request.form.get("category", "").strip() searchfolder = request.form.get("folder", "").strip() datefrom = request.form.get("datefrom", "").strip() dateto = request.form.get("dateto", "").strip() + filetypes = [ft.strip().lower() for ft in request.form.getlist("filetype") if ft.strip()] + if not filetypes: + # Default to audio when nothing selected + filetypes = ['audio'] include_transcript = request.form.get("includeTranscript") in ["true", "on"] words = [w for w in query.split() if w] @@ -72,6 +83,25 @@ def searchcommand(): if datefrom or dateto: conditions.append("performance_date IS NOT NULL") + # Filetype filters (multiple selection) + selected_groups = [ft for ft in filetypes if ft in FILETYPE_GROUPS] + include_other = 'other' in filetypes + + # If not all groups selected, apply filter + if set(filetypes) != {'audio', 'video', 'image', 'other'}: + clauses = [] + if selected_groups: + ext_list = tuple({ext for g in selected_groups for ext in FILETYPE_GROUPS[g]}) + placeholders = ",".join("?" for _ in ext_list) + clauses.append(f"filetype IN ({placeholders})") + params.extend(ext_list) + if include_other: + placeholders = ",".join("?" for _ in ALL_GROUP_EXTS) + clauses.append(f"(filetype IS NULL OR filetype = '' OR filetype NOT IN ({placeholders}))") + params.extend(ALL_GROUP_EXTS) + if clauses: + conditions.append("(" + " OR ".join(clauses) + ")") + # Build and execute SQL sql = "SELECT * FROM files" if conditions: diff --git a/static/app.css b/static/app.css index 29d4b47..f9a4604 100644 --- a/static/app.css +++ b/static/app.css @@ -1169,3 +1169,10 @@ footer .audio-player-container { color: var(--brand-ink); border-color: var(--brand-navy); } + +/* Highlight a file when opened from search */ +.search-highlight { + outline: 2px solid #f6c344; + background-color: rgba(246, 195, 68, 0.25); + border-radius: 4px; +} diff --git a/static/search.js b/static/search.js index 4299780..bb716df 100644 --- a/static/search.js +++ b/static/search.js @@ -5,17 +5,26 @@ document.addEventListener('DOMContentLoaded', function() { const resultsDiv = document.getElementById('results'); resultsDiv.innerHTML = '
${fileAction}
+Anzahl Downloads: ${file.hitcount}
${ file.performance_date !== undefined ? `Datum: ${file.performance_date}
` : ``} ${ file.transcript_hits !== undefined ? `Treffer im Transkript: ${file.transcript_hits}
` : ``} @@ -24,6 +33,7 @@ document.addEventListener('DOMContentLoaded', function() { resultsDiv.appendChild(card); }); attachEventListeners(); + attachSearchFolderButtons(); } else { resultsDiv.innerHTML = 'No results found.
'; } @@ -55,6 +65,22 @@ document.addEventListener('DOMContentLoaded', function() { } } + // Restore previously selected filetypes (multi-select). Default to audio if none stored. + const previousFiletypes = localStorage.getItem("searchFiletypes"); + if (previousFiletypes) { + try { + const list = JSON.parse(previousFiletypes); + document.querySelectorAll('input[name=\"filetype\"]').forEach(cb => { + cb.checked = list.includes(cb.value); + }); + } catch (e) { + console.error('Error parsing stored filetypes', e); + document.getElementById('filetype-audio').checked = true; + } + } else { + document.getElementById('filetype-audio').checked = true; + } + // Restore the checkbox state for "Im Transkript suchen" const previousIncludeTranscript = localStorage.getItem("searchIncludeTranscript"); if (previousIncludeTranscript !== null) { @@ -74,6 +100,15 @@ document.addEventListener('DOMContentLoaded', function() { const categoryRadio = document.querySelector('input[name="category"]:checked'); const category = categoryRadio ? categoryRadio.value : ''; + // Get selected filetypes (allow multiple). Default to audio if none selected. + const filetypeCheckboxes = document.querySelectorAll('input[name=\"filetype\"]'); + let filetypes = Array.from(filetypeCheckboxes).filter(cb => cb.checked).map(cb => cb.value); + if (filetypes.length === 0) { + // enforce audio as default when user unchecked all + document.getElementById('filetype-audio').checked = true; + filetypes = ['audio']; + } + // Prevent accidental re-selection of already selected radio buttons const radios = document.querySelectorAll('input[name="category"]'); radios.forEach(radio => { @@ -98,6 +133,7 @@ document.addEventListener('DOMContentLoaded', function() { formData.append('datefrom', document.getElementById('datefrom').value); formData.append('dateto', document.getElementById('dateto').value); formData.append('includeTranscript', includeTranscript); + filetypes.forEach(ft => formData.append('filetype', ft)); const settleSpinner = () => { clearTimeout(spinnerTimer); @@ -121,6 +157,7 @@ document.addEventListener('DOMContentLoaded', function() { // Save the search word, selected category, and checkbox state in localStorage localStorage.setItem("searchQuery", query); localStorage.setItem("searchCategory", category); + localStorage.setItem("searchFiletypes", JSON.stringify(filetypes)); localStorage.setItem("searchIncludeTranscript", includeTranscript); }) .catch(error => { @@ -140,6 +177,7 @@ document.addEventListener('DOMContentLoaded', function() { localStorage.removeItem("searchResponse"); localStorage.removeItem("searchQuery"); localStorage.removeItem("searchCategory"); + localStorage.removeItem("searchFiletypes"); localStorage.removeItem("folder"); localStorage.removeItem("datefrom"); localStorage.removeItem("dateto"); @@ -149,6 +187,9 @@ document.addEventListener('DOMContentLoaded', function() { document.querySelector('input[name="category"][value=""]').checked = true; const otherRadios = document.querySelectorAll('input[name="category"]:not([value=""])'); otherRadios.forEach(radio => radio.checked = false); + document.getElementById('filetype-audio').checked = true; + const otherFiletypeBoxes = document.querySelectorAll('input[name=\"filetype\"]:not([value=\"audio\"])'); + otherFiletypeBoxes.forEach(cb => cb.checked = false); document.getElementById('folder').value = ''; // Reset to "Alle" document.getElementById('datefrom').value = ''; // Reset date from document.getElementById('dateto').value = ''; // Reset date to @@ -164,6 +205,30 @@ document.addEventListener('DOMContentLoaded', function() { }); }); +function attachSearchFolderButtons() { + document.querySelectorAll('.folder-open-btn').forEach(btn => { + btn.addEventListener('click', (e) => { + e.preventDefault(); + const folder = btn.dataset.folder; + const file = btn.dataset.file; + openFolderAndHighlight(folder, file); + }); + }); +} + +function openFolderAndHighlight(folderPath, filePath) { + const targetFolder = folderPath || ''; + // Switch back to main view before loading folder + viewMain(); + loadDirectory(targetFolder).then(() => { + const target = document.querySelector(`.play-file[data-url=\"${filePath}\"]`); + if (target) { + target.classList.add('search-highlight'); + target.scrollIntoView({ behavior: 'smooth', block: 'center' }); + } + }); +} + function syncThemeColor() { // read the CSS variable from :root (or any selector) const cssVar = getComputedStyle(document.documentElement) diff --git a/templates/app.html b/templates/app.html index d4084f4..2df64f5 100644 --- a/templates/app.html +++ b/templates/app.html @@ -126,6 +126,32 @@