index and search all files
This commit is contained in:
parent
f8e705ab20
commit
77d1672efb
@ -4,6 +4,7 @@ import sqlite3
|
||||
from datetime import datetime
|
||||
import re
|
||||
import helperfunctions as hf
|
||||
from collections import defaultdict
|
||||
|
||||
SEARCH_DB_NAME = 'search.db'
|
||||
ACCESS_LOG_DB_NAME = 'access_log.db'
|
||||
@ -17,6 +18,10 @@ search_db.row_factory = sqlite3.Row
|
||||
access_log_db = sqlite3.connect(f'file:{ACCESS_LOG_DB_NAME}?mode=ro', uri=True)
|
||||
access_log_db.row_factory = sqlite3.Row
|
||||
|
||||
def log(message: str):
|
||||
"""Small helper to ensure console output is flushed immediately."""
|
||||
print(message, flush=True)
|
||||
|
||||
def init_db():
|
||||
"""Initializes the database with the required schema."""
|
||||
cursor = search_db.cursor()
|
||||
@ -54,13 +59,16 @@ def scan_dir(directory):
|
||||
with os.scandir(directory) as it:
|
||||
for entry in it:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
# Skip transcription directories immediately.
|
||||
if entry.name.lower() == "transkription":
|
||||
# Skip unwanted directories immediately.
|
||||
if entry.name.startswith(('.', '@', '#')):
|
||||
continue
|
||||
if entry.name.lower() in {"transkription", ".app", "#recycle"}:
|
||||
continue
|
||||
yield from scan_dir(entry.path)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
yield entry
|
||||
except PermissionError:
|
||||
log(f"Permission denied: {directory}")
|
||||
return
|
||||
|
||||
def get_hit_count(relative_path):
|
||||
@ -71,8 +79,65 @@ def get_hit_count(relative_path):
|
||||
return row["hit_count"] if row else 0
|
||||
|
||||
|
||||
def get_hit_counts_for_basefolder(basefolder: str) -> dict:
|
||||
"""Return a map of rel_path -> hit_count for all files under a basefolder."""
|
||||
cursor = access_log_db.cursor()
|
||||
pattern = f"{basefolder}/%"
|
||||
cursor.execute(
|
||||
"SELECT rel_path, COUNT(*) AS hit_count FROM file_access_log WHERE rel_path LIKE ? GROUP BY rel_path",
|
||||
(pattern,)
|
||||
)
|
||||
return {row["rel_path"]: row["hit_count"] for row in cursor.fetchall()}
|
||||
|
||||
|
||||
def log_structure(root_path, max_depth=None, show_files=False):
|
||||
"""
|
||||
Log folder structure up to max_depth levels (root = depth 1).
|
||||
If max_depth is None, traverse all depths. Files are logged only when show_files is True.
|
||||
"""
|
||||
depth_label = "all" if max_depth is None else f"<= {max_depth}"
|
||||
log(f"Folder structure (depth {depth_label}) for '{root_path}':")
|
||||
|
||||
def _walk(path, depth):
|
||||
if max_depth is not None and depth > max_depth:
|
||||
return
|
||||
try:
|
||||
with os.scandir(path) as it:
|
||||
entries = sorted(it, key=lambda e: (not e.is_dir(follow_symlinks=False), e.name.lower()))
|
||||
for entry in entries:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
if entry.name.startswith(('.', '@', '#')):
|
||||
continue
|
||||
if entry.name.lower() in {"transkription"}:
|
||||
continue
|
||||
indent = " " * (depth - 1)
|
||||
log(f"{indent}- {entry.name}/")
|
||||
_walk(entry.path, depth + 1)
|
||||
elif show_files:
|
||||
indent = " " * (depth - 1)
|
||||
log(f"{indent}- {entry.name}")
|
||||
except PermissionError:
|
||||
indent = " " * (depth - 1)
|
||||
log(f"{indent}- [permission denied]")
|
||||
|
||||
_walk(root_path, depth=1)
|
||||
|
||||
|
||||
def log_file(relative_path: str, filename: str):
|
||||
"""Debug helper to log each file that is indexed."""
|
||||
log(f" file: {relative_path} ({filename})")
|
||||
|
||||
|
||||
def log_directory_batch(directory: str, files: list[str]):
|
||||
"""Log file count for a directory without listing filenames."""
|
||||
if not files:
|
||||
return
|
||||
log(f" Dir {directory or '/'}: {len(files)} files")
|
||||
|
||||
|
||||
def updatefileindex():
|
||||
cursor = search_db.cursor()
|
||||
totals = {"folders": 0, "scanned": 0, "deleted": 0}
|
||||
|
||||
# Load folder configuration from JSON file.
|
||||
with open(FOLDER_CONFIG, "r", encoding="utf-8") as f:
|
||||
@ -81,16 +146,21 @@ def updatefileindex():
|
||||
# Process each configured base folder.
|
||||
for config in config_data:
|
||||
for folder in config.get("folders", []):
|
||||
totals["folders"] += 1
|
||||
foldername = folder.get("foldername")
|
||||
print(f"Processing folder: {foldername}")
|
||||
log(f"Processing folder: {foldername}")
|
||||
raw_folderpath = folder.get("folderpath")
|
||||
norm_folderpath = os.path.normpath(raw_folderpath)
|
||||
log_structure(norm_folderpath, max_depth=None, show_files=False)
|
||||
# Precompute the length of the base folder path (plus one for the separator)
|
||||
base_len = len(norm_folderpath) + 1
|
||||
# Prefetch hit counts for this basefolder to avoid per-file queries
|
||||
hitcount_map = get_hit_counts_for_basefolder(foldername)
|
||||
|
||||
# Accumulate scanned file data and keys for this base folder.
|
||||
scanned_files = [] # Each entry: (relative_path, basefolder, filename, filetype, transcript, hitcount)
|
||||
current_keys = set()
|
||||
dir_files = defaultdict(list) # map of directory -> list of filenames
|
||||
for entry in scan_dir(norm_folderpath):
|
||||
transcript = None
|
||||
entry_path = os.path.normpath(entry.path)
|
||||
@ -103,12 +173,8 @@ def updatefileindex():
|
||||
relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
|
||||
filetype = os.path.splitext(entry.name)[1].lower()
|
||||
|
||||
if filetype not in ['.mp3', '.wav', '.ogg', '.m4a', '.flac']:
|
||||
# Skip non-audio files.
|
||||
continue
|
||||
|
||||
# Retrieve the hit count for this file.
|
||||
hit_count = get_hit_count(relative_path)
|
||||
# Retrieve the hit count for this file from pre-fetched map.
|
||||
hit_count = hitcount_map.get(relative_path, 0)
|
||||
|
||||
# Determine the site
|
||||
if foldername == 'Gottesdienste Speyer':
|
||||
@ -134,15 +200,24 @@ def updatefileindex():
|
||||
|
||||
performance_date = hf.extract_date_from_string(relative_path)
|
||||
|
||||
# Debug: batch file logging per directory
|
||||
dir_files[os.path.dirname(relative_path)].append(entry.name)
|
||||
|
||||
scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count))
|
||||
current_keys.add((relative_path, entry.name))
|
||||
|
||||
# After scanning, log grouped files per directory
|
||||
for d, files in dir_files.items():
|
||||
log_directory_batch(d, files)
|
||||
|
||||
# Remove database entries for files under this base folder that are no longer on disk.
|
||||
pattern = foldername + os.sep + '%'
|
||||
cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,))
|
||||
db_rows = cursor.fetchall()
|
||||
keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows)
|
||||
keys_to_delete = keys_in_db - current_keys
|
||||
deleted_count = len(keys_to_delete)
|
||||
totals["deleted"] += deleted_count
|
||||
for key in keys_to_delete:
|
||||
cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key)
|
||||
|
||||
@ -154,7 +229,11 @@ def updatefileindex():
|
||||
|
||||
# Commit changes after processing this base folder.
|
||||
search_db.commit()
|
||||
folder_scanned = len(scanned_files)
|
||||
totals["scanned"] += folder_scanned
|
||||
log(f"Indexed {folder_scanned} files (deleted {deleted_count}) in '{foldername}'")
|
||||
|
||||
log(f"Index update finished: folders={totals['folders']}, files indexed={totals['scanned']}, removed={totals['deleted']}")
|
||||
return "File index updated successfully"
|
||||
|
||||
def convert_dates(search_db,
|
||||
|
||||
30
search.py
30
search.py
@ -12,12 +12,23 @@ search_db.row_factory = sqlite3.Row
|
||||
with open("app_config.json", 'r') as file:
|
||||
app_config = json.load(file)
|
||||
|
||||
FILETYPE_GROUPS = {
|
||||
'audio': ('.mp3', '.wav', '.ogg', '.m4a', '.flac'),
|
||||
'video': ('.mp4', '.mov', '.mkv', '.avi', '.webm'),
|
||||
'image': ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff')
|
||||
}
|
||||
ALL_GROUP_EXTS = tuple(sorted({ext for group in FILETYPE_GROUPS.values() for ext in group}))
|
||||
|
||||
def searchcommand():
|
||||
query = request.form.get("query", "").strip()
|
||||
category = request.form.get("category", "").strip()
|
||||
searchfolder = request.form.get("folder", "").strip()
|
||||
datefrom = request.form.get("datefrom", "").strip()
|
||||
dateto = request.form.get("dateto", "").strip()
|
||||
filetypes = [ft.strip().lower() for ft in request.form.getlist("filetype") if ft.strip()]
|
||||
if not filetypes:
|
||||
# Default to audio when nothing selected
|
||||
filetypes = ['audio']
|
||||
|
||||
include_transcript = request.form.get("includeTranscript") in ["true", "on"]
|
||||
words = [w for w in query.split() if w]
|
||||
@ -72,6 +83,25 @@ def searchcommand():
|
||||
if datefrom or dateto:
|
||||
conditions.append("performance_date IS NOT NULL")
|
||||
|
||||
# Filetype filters (multiple selection)
|
||||
selected_groups = [ft for ft in filetypes if ft in FILETYPE_GROUPS]
|
||||
include_other = 'other' in filetypes
|
||||
|
||||
# If not all groups selected, apply filter
|
||||
if set(filetypes) != {'audio', 'video', 'image', 'other'}:
|
||||
clauses = []
|
||||
if selected_groups:
|
||||
ext_list = tuple({ext for g in selected_groups for ext in FILETYPE_GROUPS[g]})
|
||||
placeholders = ",".join("?" for _ in ext_list)
|
||||
clauses.append(f"filetype IN ({placeholders})")
|
||||
params.extend(ext_list)
|
||||
if include_other:
|
||||
placeholders = ",".join("?" for _ in ALL_GROUP_EXTS)
|
||||
clauses.append(f"(filetype IS NULL OR filetype = '' OR filetype NOT IN ({placeholders}))")
|
||||
params.extend(ALL_GROUP_EXTS)
|
||||
if clauses:
|
||||
conditions.append("(" + " OR ".join(clauses) + ")")
|
||||
|
||||
# Build and execute SQL
|
||||
sql = "SELECT * FROM files"
|
||||
if conditions:
|
||||
|
||||
@ -1169,3 +1169,10 @@ footer .audio-player-container {
|
||||
color: var(--brand-ink);
|
||||
border-color: var(--brand-navy);
|
||||
}
|
||||
|
||||
/* Highlight a file when opened from search */
|
||||
.search-highlight {
|
||||
outline: 2px solid #f6c344;
|
||||
background-color: rgba(246, 195, 68, 0.25);
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
@ -5,17 +5,26 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
const resultsDiv = document.getElementById('results');
|
||||
resultsDiv.innerHTML = '<h5>Suchergebnisse:</h5>';
|
||||
|
||||
const audioExts = ['.mp3', '.wav', '.ogg', '.m4a', '.flac'];
|
||||
|
||||
if (data.results && data.results.length > 0) {
|
||||
data.results.forEach(file => {
|
||||
const card = document.createElement('div');
|
||||
const filenameWithoutExtension = file.filename.split('.').slice(0, -1).join('.');
|
||||
const parentFolder = file.relative_path.split('/').slice(0, -1).join('/');
|
||||
const transcriptURL = '/transcript/' + parentFolder + '/Transkription/' + filenameWithoutExtension + '.md';
|
||||
const isAudio = audioExts.includes((file.filetype || '').toLowerCase());
|
||||
const encodedRelPath = encodeURI(file.relative_path);
|
||||
|
||||
card.className = 'card';
|
||||
const fileAction = isAudio
|
||||
? `<button class="btn btn-light play-audio-btn" onclick="player.loadTrack('${file.relative_path}')" style="width:100%;"><i class="bi bi-volume-up"></i> ${filenameWithoutExtension}</button>`
|
||||
: `<a class="btn btn-light download-btn" href="/media/${encodedRelPath}" download style="width:100%;"><i class="bi bi-download"></i> ${file.filename}</a>`;
|
||||
|
||||
card.innerHTML = `
|
||||
<div class="card-body">
|
||||
<p><button class="btn btn-light" onclick="player.loadTrack('${file.relative_path}')" style="width:100%;"><i class="bi bi-volume-up"></i> ${filenameWithoutExtension}</button></p>
|
||||
<p><button onclick="window.open('/path/${file.relative_path}', '_self');" class="btn btn-light btn-sm" style="width:100%;"><i class="bi bi-folder"></i> ${parentFolder}</button></p>
|
||||
<p>${fileAction}</p>
|
||||
<p><button class="btn btn-light btn-sm folder-open-btn" data-folder="${parentFolder}" data-file="${file.relative_path}" style="width:100%;"><i class="bi bi-folder"></i> ${parentFolder || 'Ordner'}</button></p>
|
||||
<p class="card-text">Anzahl Downloads: ${file.hitcount}</p>
|
||||
${ file.performance_date !== undefined ? `<p class="card-text">Datum: ${file.performance_date}</p>` : ``}
|
||||
${ file.transcript_hits !== undefined ? `<p class="card-text">Treffer im Transkript: ${file.transcript_hits} <a href="#" class="show-transcript" data-url="${transcriptURL}" data-audio-url="${file.relative_path}" highlight="${file.query}"><i class="bi bi-journal-text"></i></a></p>` : ``}
|
||||
@ -24,6 +33,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
resultsDiv.appendChild(card);
|
||||
});
|
||||
attachEventListeners();
|
||||
attachSearchFolderButtons();
|
||||
} else {
|
||||
resultsDiv.innerHTML = '<p>No results found.</p>';
|
||||
}
|
||||
@ -55,6 +65,22 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
}
|
||||
}
|
||||
|
||||
// Restore previously selected filetypes (multi-select). Default to audio if none stored.
|
||||
const previousFiletypes = localStorage.getItem("searchFiletypes");
|
||||
if (previousFiletypes) {
|
||||
try {
|
||||
const list = JSON.parse(previousFiletypes);
|
||||
document.querySelectorAll('input[name=\"filetype\"]').forEach(cb => {
|
||||
cb.checked = list.includes(cb.value);
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('Error parsing stored filetypes', e);
|
||||
document.getElementById('filetype-audio').checked = true;
|
||||
}
|
||||
} else {
|
||||
document.getElementById('filetype-audio').checked = true;
|
||||
}
|
||||
|
||||
// Restore the checkbox state for "Im Transkript suchen"
|
||||
const previousIncludeTranscript = localStorage.getItem("searchIncludeTranscript");
|
||||
if (previousIncludeTranscript !== null) {
|
||||
@ -74,6 +100,15 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
const categoryRadio = document.querySelector('input[name="category"]:checked');
|
||||
const category = categoryRadio ? categoryRadio.value : '';
|
||||
|
||||
// Get selected filetypes (allow multiple). Default to audio if none selected.
|
||||
const filetypeCheckboxes = document.querySelectorAll('input[name=\"filetype\"]');
|
||||
let filetypes = Array.from(filetypeCheckboxes).filter(cb => cb.checked).map(cb => cb.value);
|
||||
if (filetypes.length === 0) {
|
||||
// enforce audio as default when user unchecked all
|
||||
document.getElementById('filetype-audio').checked = true;
|
||||
filetypes = ['audio'];
|
||||
}
|
||||
|
||||
// Prevent accidental re-selection of already selected radio buttons
|
||||
const radios = document.querySelectorAll('input[name="category"]');
|
||||
radios.forEach(radio => {
|
||||
@ -98,6 +133,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
formData.append('datefrom', document.getElementById('datefrom').value);
|
||||
formData.append('dateto', document.getElementById('dateto').value);
|
||||
formData.append('includeTranscript', includeTranscript);
|
||||
filetypes.forEach(ft => formData.append('filetype', ft));
|
||||
|
||||
const settleSpinner = () => {
|
||||
clearTimeout(spinnerTimer);
|
||||
@ -121,6 +157,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
// Save the search word, selected category, and checkbox state in localStorage
|
||||
localStorage.setItem("searchQuery", query);
|
||||
localStorage.setItem("searchCategory", category);
|
||||
localStorage.setItem("searchFiletypes", JSON.stringify(filetypes));
|
||||
localStorage.setItem("searchIncludeTranscript", includeTranscript);
|
||||
})
|
||||
.catch(error => {
|
||||
@ -140,6 +177,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
localStorage.removeItem("searchResponse");
|
||||
localStorage.removeItem("searchQuery");
|
||||
localStorage.removeItem("searchCategory");
|
||||
localStorage.removeItem("searchFiletypes");
|
||||
localStorage.removeItem("folder");
|
||||
localStorage.removeItem("datefrom");
|
||||
localStorage.removeItem("dateto");
|
||||
@ -149,6 +187,9 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
document.querySelector('input[name="category"][value=""]').checked = true;
|
||||
const otherRadios = document.querySelectorAll('input[name="category"]:not([value=""])');
|
||||
otherRadios.forEach(radio => radio.checked = false);
|
||||
document.getElementById('filetype-audio').checked = true;
|
||||
const otherFiletypeBoxes = document.querySelectorAll('input[name=\"filetype\"]:not([value=\"audio\"])');
|
||||
otherFiletypeBoxes.forEach(cb => cb.checked = false);
|
||||
document.getElementById('folder').value = ''; // Reset to "Alle"
|
||||
document.getElementById('datefrom').value = ''; // Reset date from
|
||||
document.getElementById('dateto').value = ''; // Reset date to
|
||||
@ -164,6 +205,30 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
});
|
||||
});
|
||||
|
||||
function attachSearchFolderButtons() {
|
||||
document.querySelectorAll('.folder-open-btn').forEach(btn => {
|
||||
btn.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
const folder = btn.dataset.folder;
|
||||
const file = btn.dataset.file;
|
||||
openFolderAndHighlight(folder, file);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function openFolderAndHighlight(folderPath, filePath) {
|
||||
const targetFolder = folderPath || '';
|
||||
// Switch back to main view before loading folder
|
||||
viewMain();
|
||||
loadDirectory(targetFolder).then(() => {
|
||||
const target = document.querySelector(`.play-file[data-url=\"${filePath}\"]`);
|
||||
if (target) {
|
||||
target.classList.add('search-highlight');
|
||||
target.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function syncThemeColor() {
|
||||
// read the CSS variable from :root (or any selector)
|
||||
const cssVar = getComputedStyle(document.documentElement)
|
||||
|
||||
@ -126,6 +126,32 @@
|
||||
|
||||
<hr>
|
||||
|
||||
<!-- Dateityp -->
|
||||
<div class="mb-3">
|
||||
<label class="form-label">Dateityp:</label>
|
||||
<div class="d-flex flex-wrap gap-3">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" name="filetype" id="filetype-audio" value="audio" checked>
|
||||
<label class="form-check-label" for="filetype-audio">Audio</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" name="filetype" id="filetype-video" value="video">
|
||||
<label class="form-check-label" for="filetype-video">Video</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" name="filetype" id="filetype-image" value="image">
|
||||
<label class="form-check-label" for="filetype-image">Bild</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" name="filetype" id="filetype-other" value="other">
|
||||
<label class="form-check-label" for="filetype-other">Sonstige</label>
|
||||
</div>
|
||||
</div>
|
||||
<small class="text-muted">Mehrfachauswahl möglich!</small>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
<!-- Transkript durchsuchen -->
|
||||
<div class="form-check mb-3">
|
||||
<input type="checkbox" class="form-check-input" id="includeTranscript" name="includeTranscript">
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user