add search
This commit is contained in:
parent
fd83b89e6d
commit
27f96fff6e
24
app.py
24
app.py
@ -16,6 +16,7 @@ from urllib.parse import urlparse, unquote
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
import re
|
||||
|
||||
import search
|
||||
import auth
|
||||
import analytics as a
|
||||
|
||||
@ -37,6 +38,9 @@ app.add_url_rule('/dashboard', view_func=a.dashboard)
|
||||
app.add_url_rule('/connections', view_func=a.connections)
|
||||
app.add_url_rule('/mylinks', view_func=auth.mylinks)
|
||||
app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'])
|
||||
app.add_url_rule('/search', view_func=search.search, methods=['GET'])
|
||||
app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
|
||||
|
||||
|
||||
# Grab the HOST_RULE environment variable
|
||||
host_rule = os.getenv("HOST_RULE", "")
|
||||
@ -181,17 +185,33 @@ def api_browse(subpath):
|
||||
base_path = session['folders'][root]
|
||||
directory = os.path.join(base_path, *relative_parts)
|
||||
|
||||
playfile = None
|
||||
# Check if the constructed directory exists.
|
||||
if not os.path.isdir(directory):
|
||||
# Assume the last segment is a filename; remove it.
|
||||
if relative_parts:
|
||||
playfile = relative_parts.pop() # Get the filename.
|
||||
directory = os.path.join(base_path, *relative_parts)
|
||||
# Rebuild subpath to reflect the directory (without the file).
|
||||
subpath = '/'.join([root] + relative_parts)
|
||||
# If the parent directory still doesn't exist, return error.
|
||||
if not os.path.isdir(directory):
|
||||
return jsonify({'error': 'Directory not found'}), 404
|
||||
|
||||
directories, files = list_directory_contents(directory, subpath)
|
||||
breadcrumbs = generate_breadcrumbs(subpath)
|
||||
|
||||
return jsonify({
|
||||
response = {
|
||||
'breadcrumbs': breadcrumbs,
|
||||
'directories': directories,
|
||||
'files': files
|
||||
})
|
||||
}
|
||||
|
||||
# If a filename was selected include it.
|
||||
if playfile:
|
||||
response['playfile'] = os.path.join(subpath, playfile).replace(os.sep, '/')
|
||||
|
||||
return jsonify(response)
|
||||
|
||||
@app.route("/media/<path:subpath>")
|
||||
@auth.require_secret
|
||||
|
||||
113
index_for_search.py
Normal file
113
index_for_search.py
Normal file
@ -0,0 +1,113 @@
|
||||
import os
|
||||
import json
|
||||
import sqlite3
|
||||
|
||||
SEARCH_DB_NAME = 'search.db'
|
||||
|
||||
search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
|
||||
search_db.row_factory = sqlite3.Row
|
||||
|
||||
def init_db():
|
||||
"""Initializes the database with the required schema."""
|
||||
cursor = search_db.cursor()
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
relative_path TEXT,
|
||||
filename TEXT,
|
||||
filetype TEXT,
|
||||
transcript TEXT,
|
||||
UNIQUE(relative_path, filename)
|
||||
)
|
||||
''')
|
||||
search_db.commit()
|
||||
|
||||
def scan_dir(directory):
|
||||
"""Recursively scan directories using os.scandir for improved performance."""
|
||||
try:
|
||||
with os.scandir(directory) as it:
|
||||
for entry in it:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
# Skip transcription directories immediately.
|
||||
if entry.name.lower() == "transkription":
|
||||
continue
|
||||
yield from scan_dir(entry.path)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
yield entry
|
||||
except PermissionError:
|
||||
return
|
||||
|
||||
def updatefileindex():
|
||||
cursor = search_db.cursor()
|
||||
|
||||
# Load folder configuration from JSON file.
|
||||
with open("folder_config.json", "r", encoding="utf-8") as f:
|
||||
config_data = json.load(f)
|
||||
|
||||
# Process each configured base folder.
|
||||
for config in config_data:
|
||||
for folder in config.get("folders", []):
|
||||
foldername = folder.get("foldername")
|
||||
raw_folderpath = folder.get("folderpath")
|
||||
norm_folderpath = os.path.normpath(raw_folderpath)
|
||||
# Precompute the length of the base folder path (plus one for the separator)
|
||||
base_len = len(norm_folderpath) + 1
|
||||
|
||||
# Accumulate scanned file data and keys for this base folder.
|
||||
scanned_files = [] # Each entry: (relative_path, filename, filetype, transcript)
|
||||
current_keys = set()
|
||||
|
||||
for entry in scan_dir(norm_folderpath):
|
||||
entry_path = os.path.normpath(entry.path)
|
||||
# Get relative part by slicing if possible.
|
||||
if entry_path.startswith(norm_folderpath):
|
||||
rel_part = entry_path[base_len:]
|
||||
else:
|
||||
rel_part = os.path.relpath(entry_path, norm_folderpath)
|
||||
# Prepend the foldername so it becomes part of the stored relative path.
|
||||
relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
|
||||
print(relative_path)
|
||||
filetype = os.path.splitext(entry.name)[1].lower()
|
||||
transcript = None
|
||||
|
||||
# Check for a corresponding transcript file in a sibling "Transkription" folder.
|
||||
parent_dir = os.path.dirname(entry_path)
|
||||
transcript_dir = os.path.join(parent_dir, "Transkription")
|
||||
transcript_filename = os.path.splitext(entry.name)[0] + ".md"
|
||||
transcript_path = os.path.join(transcript_dir, transcript_filename)
|
||||
if os.path.exists(transcript_path):
|
||||
try:
|
||||
with open(transcript_path, 'r', encoding='utf-8') as tf:
|
||||
transcript = tf.read()
|
||||
except Exception:
|
||||
transcript = None
|
||||
|
||||
scanned_files.append((relative_path, entry.name, filetype, transcript))
|
||||
current_keys.add((relative_path, entry.name))
|
||||
|
||||
# Remove database entries for files under this base folder that are no longer on disk.
|
||||
pattern = foldername + os.sep + '%'
|
||||
cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,))
|
||||
db_rows = cursor.fetchall()
|
||||
keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows)
|
||||
keys_to_delete = keys_in_db - current_keys
|
||||
for key in keys_to_delete:
|
||||
cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key)
|
||||
|
||||
# Bulk write the scanned files using INSERT OR REPLACE.
|
||||
cursor.executemany(
|
||||
"INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)",
|
||||
scanned_files
|
||||
)
|
||||
|
||||
# Commit changes after processing this base folder.
|
||||
search_db.commit()
|
||||
|
||||
return "File index updated successfully"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
init_db() # Initialize the database schema if it doesn't exist
|
||||
updatefileindex() # Update the file index
|
||||
search_db.close() # Close the database connection
|
||||
print("Database connection closed.")
|
||||
58
search.py
Normal file
58
search.py
Normal file
@ -0,0 +1,58 @@
|
||||
import sqlite3
|
||||
from flask import Flask, render_template, request, request, jsonify
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
SEARCH_DB_NAME = 'search.db'
|
||||
|
||||
search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
|
||||
search_db.row_factory = sqlite3.Row
|
||||
|
||||
def searchcommand():
|
||||
query = request.form.get("query", "").strip()
|
||||
include_transcript = request.form.get("includeTranscript") == "true" or request.form.get("includeTranscript") == "on"
|
||||
words = [w for w in query.split() if w]
|
||||
cursor = search_db.cursor()
|
||||
|
||||
if not include_transcript:
|
||||
# Simple search: all words must be in either relative_path or filename.
|
||||
conditions = []
|
||||
params = []
|
||||
for word in words:
|
||||
conditions.append("(relative_path LIKE ? OR filename LIKE ?)")
|
||||
params.extend([f"%{word}%", f"%{word}%"])
|
||||
sql = "SELECT * FROM files"
|
||||
if conditions:
|
||||
sql += " WHERE " + " AND ".join(conditions)
|
||||
cursor.execute(sql, params)
|
||||
raw_results = cursor.fetchall()
|
||||
results = [dict(row) for row in raw_results]
|
||||
else:
|
||||
# Advanced search: include transcript. Count transcript hits.
|
||||
conditions = []
|
||||
params = []
|
||||
for word in words:
|
||||
conditions.append("(relative_path LIKE ? OR filename LIKE ? OR transcript LIKE ?)")
|
||||
params.extend([f"%{word}%", f"%{word}%", f"%{word}%"])
|
||||
sql = "SELECT * FROM files"
|
||||
if conditions:
|
||||
sql += " WHERE " + " AND ".join(conditions)
|
||||
cursor.execute(sql, params)
|
||||
raw_results = cursor.fetchall()
|
||||
|
||||
results = []
|
||||
for row in raw_results:
|
||||
result = dict(row)
|
||||
transcript = result.get("transcript") or ""
|
||||
total_hits = sum(transcript.lower().count(word.lower()) for word in words)
|
||||
result["transcript_hits"] = total_hits
|
||||
result["transcript"] = None # Remove full transcript if needed.
|
||||
results.append(result)
|
||||
# Sort results so files with more transcript hits are on top.
|
||||
results.sort(key=lambda x: x["transcript_hits"], reverse=True)
|
||||
|
||||
return jsonify(results=results)
|
||||
|
||||
def search():
|
||||
return render_template('search.html')
|
||||
|
||||
@ -23,7 +23,11 @@ function paintFile() {
|
||||
if (currentMusicFile) {
|
||||
const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`);
|
||||
if (currentMusicFileElement) {
|
||||
currentMusicFileElement.closest('.file-item').classList.add('currently-playing');
|
||||
const fileItem = currentMusicFileElement.closest('.file-item');
|
||||
fileItem.classList.add('currently-playing');
|
||||
// setTimeout(() => {
|
||||
// fileItem.scrollIntoView({ block: "center", inline: "nearest" });
|
||||
// }, 300);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -144,6 +148,12 @@ function loadDirectory(subpath) {
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
renderContent(data);
|
||||
if (data.playfile) {
|
||||
const playFileLink = document.querySelector(`.play-file[data-url="${data.playfile}"]`);
|
||||
if (playFileLink) {
|
||||
playFileLink.click();
|
||||
}
|
||||
}
|
||||
paintFile();
|
||||
return data; // return data for further chaining
|
||||
})
|
||||
|
||||
104
templates/search.html
Normal file
104
templates/search.html
Normal file
@ -0,0 +1,104 @@
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta property="og:title" content="Gottesdienste Speyer und Schwegenheim" />
|
||||
<meta property="og:description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft." />
|
||||
<meta property="og:image" content="https://app.bethaus-speyer.de/static/icons/logo-200x200.png" />
|
||||
<meta property="og:url" content="https://app.bethaus-speyer.de" />
|
||||
|
||||
<title>Dateisuche</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
|
||||
<meta name="description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft.">
|
||||
<meta name="author" content="Bethaus Speyer">
|
||||
<link rel="icon" href="/static/icons/logo-192x192.png" type="image/png" sizes="192x192">
|
||||
|
||||
<!-- Bootstrap CSS for modern styling -->
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='app.css') }}">
|
||||
<style>
|
||||
body {
|
||||
background-color: #f8f9fa;
|
||||
}
|
||||
.search-container {
|
||||
margin-top: 50px;
|
||||
}
|
||||
.card {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header class="site-header">
|
||||
<a href="#">
|
||||
<img src="/static/logoW.png" alt="Logo" class="logo">
|
||||
</a>
|
||||
<h1>Suche</h1>
|
||||
</header>
|
||||
<div class="container search-container">
|
||||
<form id="searchForm" method="post" class="mb-4">
|
||||
<div class="mb-3">
|
||||
<label for="query" class="form-label">Suchwörter:</label>
|
||||
<input type="text" id="query" name="query" class="form-control" required>
|
||||
</div>
|
||||
<div class="form-check mb-3">
|
||||
<input type="checkbox" class="form-check-input" id="includeTranscript" name="includeTranscript">
|
||||
<label class="form-check-label" for="includeTranscript">Im Transkript suchen</label>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary">Suchen</button>
|
||||
</form>
|
||||
|
||||
<!-- Container for AJAX-loaded results -->
|
||||
<div id="results"></div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
document.getElementById('searchForm').addEventListener('submit', function(e) {
|
||||
e.preventDefault();
|
||||
const query = document.getElementById('query').value.trim();
|
||||
const includeTranscript = document.getElementById('includeTranscript').checked;
|
||||
|
||||
// Prepare form data
|
||||
const formData = new FormData();
|
||||
formData.append('query', query);
|
||||
formData.append('includeTranscript', includeTranscript);
|
||||
|
||||
fetch('/searchcommand', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
const resultsDiv = document.getElementById('results');
|
||||
resultsDiv.innerHTML = ''; // Clear previous results
|
||||
if (data.results && data.results.length > 0) {
|
||||
data.results.forEach(file => {
|
||||
// Create a card element for each result
|
||||
const card = document.createElement('div');
|
||||
card.className = 'card';
|
||||
card.innerHTML = `
|
||||
<div class="card-body">
|
||||
<h5 class="card-title">
|
||||
<a href="/path/${file.relative_path}" target="_blank">${file.filename}</a>
|
||||
</h5>
|
||||
<h6 class="card-subtitle mb-2 text-muted">${file.relative_path}</h6>
|
||||
${file.transcript_hits !== undefined ? `<p class="card-text">Treffer im Transkript: ${file.transcript_hits}</p>` : ''}
|
||||
</div>
|
||||
`;
|
||||
resultsDiv.appendChild(card);
|
||||
});
|
||||
} else {
|
||||
resultsDiv.innerHTML = '<p>No results found.</p>';
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error:', error);
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
<!-- Bootstrap Bundle with Popper -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
x
Reference in New Issue
Block a user