add search

This commit is contained in:
lelo 2025-04-05 00:52:32 +02:00
parent fd83b89e6d
commit 27f96fff6e
6 changed files with 316 additions and 11 deletions

24
app.py
View File

@ -16,6 +16,7 @@ from urllib.parse import urlparse, unquote
from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.middleware.proxy_fix import ProxyFix
import re import re
import search
import auth import auth
import analytics as a import analytics as a
@ -37,6 +38,9 @@ app.add_url_rule('/dashboard', view_func=a.dashboard)
app.add_url_rule('/connections', view_func=a.connections) app.add_url_rule('/connections', view_func=a.connections)
app.add_url_rule('/mylinks', view_func=auth.mylinks) app.add_url_rule('/mylinks', view_func=auth.mylinks)
app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST']) app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'])
app.add_url_rule('/search', view_func=search.search, methods=['GET'])
app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
# Grab the HOST_RULE environment variable # Grab the HOST_RULE environment variable
host_rule = os.getenv("HOST_RULE", "") host_rule = os.getenv("HOST_RULE", "")
@ -181,17 +185,33 @@ def api_browse(subpath):
base_path = session['folders'][root] base_path = session['folders'][root]
directory = os.path.join(base_path, *relative_parts) directory = os.path.join(base_path, *relative_parts)
playfile = None
# Check if the constructed directory exists.
if not os.path.isdir(directory):
# Assume the last segment is a filename; remove it.
if relative_parts:
playfile = relative_parts.pop() # Get the filename.
directory = os.path.join(base_path, *relative_parts)
# Rebuild subpath to reflect the directory (without the file).
subpath = '/'.join([root] + relative_parts)
# If the parent directory still doesn't exist, return error.
if not os.path.isdir(directory): if not os.path.isdir(directory):
return jsonify({'error': 'Directory not found'}), 404 return jsonify({'error': 'Directory not found'}), 404
directories, files = list_directory_contents(directory, subpath) directories, files = list_directory_contents(directory, subpath)
breadcrumbs = generate_breadcrumbs(subpath) breadcrumbs = generate_breadcrumbs(subpath)
return jsonify({ response = {
'breadcrumbs': breadcrumbs, 'breadcrumbs': breadcrumbs,
'directories': directories, 'directories': directories,
'files': files 'files': files
}) }
# If a filename was selected include it.
if playfile:
response['playfile'] = os.path.join(subpath, playfile).replace(os.sep, '/')
return jsonify(response)
@app.route("/media/<path:subpath>") @app.route("/media/<path:subpath>")
@auth.require_secret @auth.require_secret

113
index_for_search.py Normal file
View File

@ -0,0 +1,113 @@
import os
import json
import sqlite3
SEARCH_DB_NAME = 'search.db'
search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
search_db.row_factory = sqlite3.Row
def init_db():
"""Initializes the database with the required schema."""
cursor = search_db.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
relative_path TEXT,
filename TEXT,
filetype TEXT,
transcript TEXT,
UNIQUE(relative_path, filename)
)
''')
search_db.commit()
def scan_dir(directory):
"""Recursively scan directories using os.scandir for improved performance."""
try:
with os.scandir(directory) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
# Skip transcription directories immediately.
if entry.name.lower() == "transkription":
continue
yield from scan_dir(entry.path)
elif entry.is_file(follow_symlinks=False):
yield entry
except PermissionError:
return
def updatefileindex():
cursor = search_db.cursor()
# Load folder configuration from JSON file.
with open("folder_config.json", "r", encoding="utf-8") as f:
config_data = json.load(f)
# Process each configured base folder.
for config in config_data:
for folder in config.get("folders", []):
foldername = folder.get("foldername")
raw_folderpath = folder.get("folderpath")
norm_folderpath = os.path.normpath(raw_folderpath)
# Precompute the length of the base folder path (plus one for the separator)
base_len = len(norm_folderpath) + 1
# Accumulate scanned file data and keys for this base folder.
scanned_files = [] # Each entry: (relative_path, filename, filetype, transcript)
current_keys = set()
for entry in scan_dir(norm_folderpath):
entry_path = os.path.normpath(entry.path)
# Get relative part by slicing if possible.
if entry_path.startswith(norm_folderpath):
rel_part = entry_path[base_len:]
else:
rel_part = os.path.relpath(entry_path, norm_folderpath)
# Prepend the foldername so it becomes part of the stored relative path.
relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
print(relative_path)
filetype = os.path.splitext(entry.name)[1].lower()
transcript = None
# Check for a corresponding transcript file in a sibling "Transkription" folder.
parent_dir = os.path.dirname(entry_path)
transcript_dir = os.path.join(parent_dir, "Transkription")
transcript_filename = os.path.splitext(entry.name)[0] + ".md"
transcript_path = os.path.join(transcript_dir, transcript_filename)
if os.path.exists(transcript_path):
try:
with open(transcript_path, 'r', encoding='utf-8') as tf:
transcript = tf.read()
except Exception:
transcript = None
scanned_files.append((relative_path, entry.name, filetype, transcript))
current_keys.add((relative_path, entry.name))
# Remove database entries for files under this base folder that are no longer on disk.
pattern = foldername + os.sep + '%'
cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,))
db_rows = cursor.fetchall()
keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows)
keys_to_delete = keys_in_db - current_keys
for key in keys_to_delete:
cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key)
# Bulk write the scanned files using INSERT OR REPLACE.
cursor.executemany(
"INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)",
scanned_files
)
# Commit changes after processing this base folder.
search_db.commit()
return "File index updated successfully"
if __name__ == "__main__":
init_db() # Initialize the database schema if it doesn't exist
updatefileindex() # Update the file index
search_db.close() # Close the database connection
print("Database connection closed.")

BIN
search.db Normal file

Binary file not shown.

58
search.py Normal file
View File

@ -0,0 +1,58 @@
import sqlite3
from flask import Flask, render_template, request, request, jsonify
app = Flask(__name__)
SEARCH_DB_NAME = 'search.db'
search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
search_db.row_factory = sqlite3.Row
def searchcommand():
query = request.form.get("query", "").strip()
include_transcript = request.form.get("includeTranscript") == "true" or request.form.get("includeTranscript") == "on"
words = [w for w in query.split() if w]
cursor = search_db.cursor()
if not include_transcript:
# Simple search: all words must be in either relative_path or filename.
conditions = []
params = []
for word in words:
conditions.append("(relative_path LIKE ? OR filename LIKE ?)")
params.extend([f"%{word}%", f"%{word}%"])
sql = "SELECT * FROM files"
if conditions:
sql += " WHERE " + " AND ".join(conditions)
cursor.execute(sql, params)
raw_results = cursor.fetchall()
results = [dict(row) for row in raw_results]
else:
# Advanced search: include transcript. Count transcript hits.
conditions = []
params = []
for word in words:
conditions.append("(relative_path LIKE ? OR filename LIKE ? OR transcript LIKE ?)")
params.extend([f"%{word}%", f"%{word}%", f"%{word}%"])
sql = "SELECT * FROM files"
if conditions:
sql += " WHERE " + " AND ".join(conditions)
cursor.execute(sql, params)
raw_results = cursor.fetchall()
results = []
for row in raw_results:
result = dict(row)
transcript = result.get("transcript") or ""
total_hits = sum(transcript.lower().count(word.lower()) for word in words)
result["transcript_hits"] = total_hits
result["transcript"] = None # Remove full transcript if needed.
results.append(result)
# Sort results so files with more transcript hits are on top.
results.sort(key=lambda x: x["transcript_hits"], reverse=True)
return jsonify(results=results)
def search():
return render_template('search.html')

View File

@ -23,11 +23,15 @@ function paintFile() {
if (currentMusicFile) { if (currentMusicFile) {
const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`); const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`);
if (currentMusicFileElement) { if (currentMusicFileElement) {
currentMusicFileElement.closest('.file-item').classList.add('currently-playing'); const fileItem = currentMusicFileElement.closest('.file-item');
} fileItem.classList.add('currently-playing');
// setTimeout(() => {
// fileItem.scrollIntoView({ block: "center", inline: "nearest" });
// }, 300);
} }
} }
} }
}
function renderContent(data) { function renderContent(data) {
@ -144,6 +148,12 @@ function loadDirectory(subpath) {
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {
renderContent(data); renderContent(data);
if (data.playfile) {
const playFileLink = document.querySelector(`.play-file[data-url="${data.playfile}"]`);
if (playFileLink) {
playFileLink.click();
}
}
paintFile(); paintFile();
return data; // return data for further chaining return data; // return data for further chaining
}) })

104
templates/search.html Normal file
View File

@ -0,0 +1,104 @@
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<meta property="og:title" content="Gottesdienste Speyer und Schwegenheim" />
<meta property="og:description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft." />
<meta property="og:image" content="https://app.bethaus-speyer.de/static/icons/logo-200x200.png" />
<meta property="og:url" content="https://app.bethaus-speyer.de" />
<title>Dateisuche</title>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
<meta name="description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft.">
<meta name="author" content="Bethaus Speyer">
<link rel="icon" href="/static/icons/logo-192x192.png" type="image/png" sizes="192x192">
<!-- Bootstrap CSS for modern styling -->
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="{{ url_for('static', filename='app.css') }}">
<style>
body {
background-color: #f8f9fa;
}
.search-container {
margin-top: 50px;
}
.card {
margin-bottom: 20px;
}
</style>
</head>
<body>
<header class="site-header">
<a href="#">
<img src="/static/logoW.png" alt="Logo" class="logo">
</a>
<h1>Suche</h1>
</header>
<div class="container search-container">
<form id="searchForm" method="post" class="mb-4">
<div class="mb-3">
<label for="query" class="form-label">Suchwörter:</label>
<input type="text" id="query" name="query" class="form-control" required>
</div>
<div class="form-check mb-3">
<input type="checkbox" class="form-check-input" id="includeTranscript" name="includeTranscript">
<label class="form-check-label" for="includeTranscript">Im Transkript suchen</label>
</div>
<button type="submit" class="btn btn-primary">Suchen</button>
</form>
<!-- Container for AJAX-loaded results -->
<div id="results"></div>
</div>
<script>
document.getElementById('searchForm').addEventListener('submit', function(e) {
e.preventDefault();
const query = document.getElementById('query').value.trim();
const includeTranscript = document.getElementById('includeTranscript').checked;
// Prepare form data
const formData = new FormData();
formData.append('query', query);
formData.append('includeTranscript', includeTranscript);
fetch('/searchcommand', {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = ''; // Clear previous results
if (data.results && data.results.length > 0) {
data.results.forEach(file => {
// Create a card element for each result
const card = document.createElement('div');
card.className = 'card';
card.innerHTML = `
<div class="card-body">
<h5 class="card-title">
<a href="/path/${file.relative_path}" target="_blank">${file.filename}</a>
</h5>
<h6 class="card-subtitle mb-2 text-muted">${file.relative_path}</h6>
${file.transcript_hits !== undefined ? `<p class="card-text">Treffer im Transkript: ${file.transcript_hits}</p>` : ''}
</div>
`;
resultsDiv.appendChild(card);
});
} else {
resultsDiv.innerHTML = '<p>No results found.</p>';
}
})
.catch(error => {
console.error('Error:', error);
});
});
</script>
<!-- Bootstrap Bundle with Popper -->
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>