diff --git a/app.py b/app.py index f439bfc..1ab4b54 100755 --- a/app.py +++ b/app.py @@ -16,6 +16,7 @@ from urllib.parse import urlparse, unquote from werkzeug.middleware.proxy_fix import ProxyFix import re +import search import auth import analytics as a @@ -37,6 +38,9 @@ app.add_url_rule('/dashboard', view_func=a.dashboard) app.add_url_rule('/connections', view_func=a.connections) app.add_url_rule('/mylinks', view_func=auth.mylinks) app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST']) +app.add_url_rule('/search', view_func=search.search, methods=['GET']) +app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST']) + # Grab the HOST_RULE environment variable host_rule = os.getenv("HOST_RULE", "") @@ -181,17 +185,33 @@ def api_browse(subpath): base_path = session['folders'][root] directory = os.path.join(base_path, *relative_parts) + playfile = None + # Check if the constructed directory exists. if not os.path.isdir(directory): - return jsonify({'error': 'Directory not found'}), 404 + # Assume the last segment is a filename; remove it. + if relative_parts: + playfile = relative_parts.pop() # Get the filename. + directory = os.path.join(base_path, *relative_parts) + # Rebuild subpath to reflect the directory (without the file). + subpath = '/'.join([root] + relative_parts) + # If the parent directory still doesn't exist, return error. + if not os.path.isdir(directory): + return jsonify({'error': 'Directory not found'}), 404 directories, files = list_directory_contents(directory, subpath) breadcrumbs = generate_breadcrumbs(subpath) - return jsonify({ + response = { 'breadcrumbs': breadcrumbs, 'directories': directories, 'files': files - }) + } + + # If a filename was selected include it. + if playfile: + response['playfile'] = os.path.join(subpath, playfile).replace(os.sep, '/') + + return jsonify(response) @app.route("/media/") @auth.require_secret diff --git a/index_for_search.py b/index_for_search.py new file mode 100644 index 0000000..c79511d --- /dev/null +++ b/index_for_search.py @@ -0,0 +1,113 @@ +import os +import json +import sqlite3 + +SEARCH_DB_NAME = 'search.db' + +search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False) +search_db.row_factory = sqlite3.Row + +def init_db(): + """Initializes the database with the required schema.""" + cursor = search_db.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + relative_path TEXT, + filename TEXT, + filetype TEXT, + transcript TEXT, + UNIQUE(relative_path, filename) + ) + ''') + search_db.commit() + +def scan_dir(directory): + """Recursively scan directories using os.scandir for improved performance.""" + try: + with os.scandir(directory) as it: + for entry in it: + if entry.is_dir(follow_symlinks=False): + # Skip transcription directories immediately. + if entry.name.lower() == "transkription": + continue + yield from scan_dir(entry.path) + elif entry.is_file(follow_symlinks=False): + yield entry + except PermissionError: + return + +def updatefileindex(): + cursor = search_db.cursor() + + # Load folder configuration from JSON file. + with open("folder_config.json", "r", encoding="utf-8") as f: + config_data = json.load(f) + + # Process each configured base folder. + for config in config_data: + for folder in config.get("folders", []): + foldername = folder.get("foldername") + raw_folderpath = folder.get("folderpath") + norm_folderpath = os.path.normpath(raw_folderpath) + # Precompute the length of the base folder path (plus one for the separator) + base_len = len(norm_folderpath) + 1 + + # Accumulate scanned file data and keys for this base folder. + scanned_files = [] # Each entry: (relative_path, filename, filetype, transcript) + current_keys = set() + + for entry in scan_dir(norm_folderpath): + entry_path = os.path.normpath(entry.path) + # Get relative part by slicing if possible. + if entry_path.startswith(norm_folderpath): + rel_part = entry_path[base_len:] + else: + rel_part = os.path.relpath(entry_path, norm_folderpath) + # Prepend the foldername so it becomes part of the stored relative path. + relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/') + print(relative_path) + filetype = os.path.splitext(entry.name)[1].lower() + transcript = None + + # Check for a corresponding transcript file in a sibling "Transkription" folder. + parent_dir = os.path.dirname(entry_path) + transcript_dir = os.path.join(parent_dir, "Transkription") + transcript_filename = os.path.splitext(entry.name)[0] + ".md" + transcript_path = os.path.join(transcript_dir, transcript_filename) + if os.path.exists(transcript_path): + try: + with open(transcript_path, 'r', encoding='utf-8') as tf: + transcript = tf.read() + except Exception: + transcript = None + + scanned_files.append((relative_path, entry.name, filetype, transcript)) + current_keys.add((relative_path, entry.name)) + + # Remove database entries for files under this base folder that are no longer on disk. + pattern = foldername + os.sep + '%' + cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,)) + db_rows = cursor.fetchall() + keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows) + keys_to_delete = keys_in_db - current_keys + for key in keys_to_delete: + cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key) + + # Bulk write the scanned files using INSERT OR REPLACE. + cursor.executemany( + "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)", + scanned_files + ) + + # Commit changes after processing this base folder. + search_db.commit() + + return "File index updated successfully" + + +if __name__ == "__main__": + init_db() # Initialize the database schema if it doesn't exist + updatefileindex() # Update the file index + search_db.close() # Close the database connection + print("Database connection closed.") \ No newline at end of file diff --git a/search.db b/search.db new file mode 100644 index 0000000..2ef6f3b Binary files /dev/null and b/search.db differ diff --git a/search.py b/search.py new file mode 100644 index 0000000..5564899 --- /dev/null +++ b/search.py @@ -0,0 +1,58 @@ +import sqlite3 +from flask import Flask, render_template, request, request, jsonify + +app = Flask(__name__) + +SEARCH_DB_NAME = 'search.db' + +search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False) +search_db.row_factory = sqlite3.Row + +def searchcommand(): + query = request.form.get("query", "").strip() + include_transcript = request.form.get("includeTranscript") == "true" or request.form.get("includeTranscript") == "on" + words = [w for w in query.split() if w] + cursor = search_db.cursor() + + if not include_transcript: + # Simple search: all words must be in either relative_path or filename. + conditions = [] + params = [] + for word in words: + conditions.append("(relative_path LIKE ? OR filename LIKE ?)") + params.extend([f"%{word}%", f"%{word}%"]) + sql = "SELECT * FROM files" + if conditions: + sql += " WHERE " + " AND ".join(conditions) + cursor.execute(sql, params) + raw_results = cursor.fetchall() + results = [dict(row) for row in raw_results] + else: + # Advanced search: include transcript. Count transcript hits. + conditions = [] + params = [] + for word in words: + conditions.append("(relative_path LIKE ? OR filename LIKE ? OR transcript LIKE ?)") + params.extend([f"%{word}%", f"%{word}%", f"%{word}%"]) + sql = "SELECT * FROM files" + if conditions: + sql += " WHERE " + " AND ".join(conditions) + cursor.execute(sql, params) + raw_results = cursor.fetchall() + + results = [] + for row in raw_results: + result = dict(row) + transcript = result.get("transcript") or "" + total_hits = sum(transcript.lower().count(word.lower()) for word in words) + result["transcript_hits"] = total_hits + result["transcript"] = None # Remove full transcript if needed. + results.append(result) + # Sort results so files with more transcript hits are on top. + results.sort(key=lambda x: x["transcript_hits"], reverse=True) + + return jsonify(results=results) + +def search(): + return render_template('search.html') + diff --git a/static/app.js b/static/app.js index 4bba0a8..999d7f1 100644 --- a/static/app.js +++ b/static/app.js @@ -17,17 +17,21 @@ let currentGalleryImages = []; function paintFile() { - // Highlight the currently playing file - if (currentTrackPath) { - const currentMusicFile = currentMusicFiles.find(file => file.path === currentTrackPath); - if (currentMusicFile) { - const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`); - if (currentMusicFileElement) { - currentMusicFileElement.closest('.file-item').classList.add('currently-playing'); - } + // Highlight the currently playing file + if (currentTrackPath) { + const currentMusicFile = currentMusicFiles.find(file => file.path === currentTrackPath); + if (currentMusicFile) { + const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`); + if (currentMusicFileElement) { + const fileItem = currentMusicFileElement.closest('.file-item'); + fileItem.classList.add('currently-playing'); + // setTimeout(() => { + // fileItem.scrollIntoView({ block: "center", inline: "nearest" }); + // }, 300); } } } +} function renderContent(data) { @@ -144,6 +148,12 @@ function loadDirectory(subpath) { .then(response => response.json()) .then(data => { renderContent(data); + if (data.playfile) { + const playFileLink = document.querySelector(`.play-file[data-url="${data.playfile}"]`); + if (playFileLink) { + playFileLink.click(); + } + } paintFile(); return data; // return data for further chaining }) diff --git a/templates/search.html b/templates/search.html new file mode 100644 index 0000000..1bf8713 --- /dev/null +++ b/templates/search.html @@ -0,0 +1,104 @@ + + + + + + + + + + + Dateisuche + + + + + + + + + + + + +
+
+
+ + +
+
+ + +
+ +
+ + +
+
+ + + + + + +