add search

2025-04-05 00:52:32 +02:00 · 2025-04-05 00:52:32 +02:00 · 27f96fff6e
commit 27f96fff6e
parent fd83b89e6d
6 changed files with 316 additions and 11 deletions
--- a/app.py
+++ b/app.py
@ -16,6 +16,7 @@ from urllib.parse import urlparse, unquote
 from werkzeug.middleware.proxy_fix import ProxyFix
 import re

+import search
 import auth
 import analytics as a

@ -37,6 +38,9 @@ app.add_url_rule('/dashboard', view_func=a.dashboard)
 app.add_url_rule('/connections', view_func=a.connections)
 app.add_url_rule('/mylinks', view_func=auth.mylinks)
 app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'])
+app.add_url_rule('/search', view_func=search.search, methods=['GET'])
+app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
+

 # Grab the HOST_RULE environment variable
 host_rule = os.getenv("HOST_RULE", "")
@ -181,17 +185,33 @@ def api_browse(subpath):
    base_path = session['folders'][root]
    directory = os.path.join(base_path, *relative_parts)
    
+    playfile = None
+    # Check if the constructed directory exists.
+    if not os.path.isdir(directory):
+        # Assume the last segment is a filename; remove it.
+        if relative_parts:
+            playfile = relative_parts.pop()  # Get the filename.
+            directory = os.path.join(base_path, *relative_parts)
+            # Rebuild subpath to reflect the directory (without the file).
+            subpath = '/'.join([root] + relative_parts)
+        # If the parent directory still doesn't exist, return error.
        if not os.path.isdir(directory):
            return jsonify({'error': 'Directory not found'}), 404

    directories, files = list_directory_contents(directory, subpath)
    breadcrumbs = generate_breadcrumbs(subpath)
    
-    return jsonify({
+    response = {
        'breadcrumbs': breadcrumbs,
        'directories': directories,
        'files': files
-    })
+    }
+    
+    # If a filename was selected include it.
+    if playfile:
+        response['playfile'] = os.path.join(subpath, playfile).replace(os.sep, '/')
+
+    return jsonify(response)

@app.route("/media/<path:subpath>")
@auth.require_secret
--- a/index_for_search.py
+++ b/index_for_search.py
@ -0,0 +1,113 @@
+import os
+import json
+import sqlite3
+
+SEARCH_DB_NAME = 'search.db'
+
+search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
+search_db.row_factory = sqlite3.Row
+
+def init_db():
+    """Initializes the database with the required schema."""
+    cursor = search_db.cursor()
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS files (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            relative_path TEXT,
+            filename TEXT,
+            filetype TEXT,
+            transcript TEXT,
+            UNIQUE(relative_path, filename)
+        )
+    ''')
+    search_db.commit()
+
+def scan_dir(directory):
+    """Recursively scan directories using os.scandir for improved performance."""
+    try:
+        with os.scandir(directory) as it:
+            for entry in it:
+                if entry.is_dir(follow_symlinks=False):
+                    # Skip transcription directories immediately.
+                    if entry.name.lower() == "transkription":
+                        continue
+                    yield from scan_dir(entry.path)
+                elif entry.is_file(follow_symlinks=False):
+                    yield entry
+    except PermissionError:
+        return
+
+def updatefileindex():
+    cursor = search_db.cursor()
+
+    # Load folder configuration from JSON file.
+    with open("folder_config.json", "r", encoding="utf-8") as f:
+        config_data = json.load(f)
+
+    # Process each configured base folder.
+    for config in config_data:
+        for folder in config.get("folders", []):
+            foldername = folder.get("foldername")
+            raw_folderpath = folder.get("folderpath")
+            norm_folderpath = os.path.normpath(raw_folderpath)
+            # Precompute the length of the base folder path (plus one for the separator)
+            base_len = len(norm_folderpath) + 1
+
+            # Accumulate scanned file data and keys for this base folder.
+            scanned_files = []  # Each entry: (relative_path, filename, filetype, transcript)
+            current_keys = set()
+
+            for entry in scan_dir(norm_folderpath):
+                entry_path = os.path.normpath(entry.path)
+                # Get relative part by slicing if possible.
+                if entry_path.startswith(norm_folderpath):
+                    rel_part = entry_path[base_len:]
+                else:
+                    rel_part = os.path.relpath(entry_path, norm_folderpath)
+                # Prepend the foldername so it becomes part of the stored relative path.
+                relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
+                print(relative_path)
+                filetype = os.path.splitext(entry.name)[1].lower()
+                transcript = None
+
+                # Check for a corresponding transcript file in a sibling "Transkription" folder.
+                parent_dir = os.path.dirname(entry_path)
+                transcript_dir = os.path.join(parent_dir, "Transkription")
+                transcript_filename = os.path.splitext(entry.name)[0] + ".md"
+                transcript_path = os.path.join(transcript_dir, transcript_filename)
+                if os.path.exists(transcript_path):
+                    try:
+                        with open(transcript_path, 'r', encoding='utf-8') as tf:
+                            transcript = tf.read()
+                    except Exception:
+                        transcript = None
+
+                scanned_files.append((relative_path, entry.name, filetype, transcript))
+                current_keys.add((relative_path, entry.name))
+
+            # Remove database entries for files under this base folder that are no longer on disk.
+            pattern = foldername + os.sep + '%'
+            cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,))
+            db_rows = cursor.fetchall()
+            keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows)
+            keys_to_delete = keys_in_db - current_keys
+            for key in keys_to_delete:
+                cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key)
+
+            # Bulk write the scanned files using INSERT OR REPLACE.
+            cursor.executemany(
+                "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)",
+                scanned_files
+            )
+
+            # Commit changes after processing this base folder.
+            search_db.commit()
+
+    return "File index updated successfully"
+
+
+if __name__ == "__main__":
+    init_db()  # Initialize the database schema if it doesn't exist
+    updatefileindex()  # Update the file index
+    search_db.close()  # Close the database connection
+    print("Database connection closed.")
--- a/search.db
+++ b/search.db
--- a/search.py
+++ b/search.py
@ -0,0 +1,58 @@
+import sqlite3
+from flask import Flask, render_template, request, request, jsonify
+
+app = Flask(__name__)
+
+SEARCH_DB_NAME = 'search.db'
+
+search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
+search_db.row_factory = sqlite3.Row
+
+def searchcommand():
+    query = request.form.get("query", "").strip()
+    include_transcript = request.form.get("includeTranscript") == "true" or request.form.get("includeTranscript") == "on"
+    words = [w for w in query.split() if w]
+    cursor = search_db.cursor()
+    
+    if not include_transcript:
+        # Simple search: all words must be in either relative_path or filename.
+        conditions = []
+        params = []
+        for word in words:
+            conditions.append("(relative_path LIKE ? OR filename LIKE ?)")
+            params.extend([f"%{word}%", f"%{word}%"])
+        sql = "SELECT * FROM files"
+        if conditions:
+            sql += " WHERE " + " AND ".join(conditions)
+        cursor.execute(sql, params)
+        raw_results = cursor.fetchall()
+        results = [dict(row) for row in raw_results]
+    else:
+        # Advanced search: include transcript. Count transcript hits.
+        conditions = []
+        params = []
+        for word in words:
+            conditions.append("(relative_path LIKE ? OR filename LIKE ? OR transcript LIKE ?)")
+            params.extend([f"%{word}%", f"%{word}%", f"%{word}%"])
+        sql = "SELECT * FROM files"
+        if conditions:
+            sql += " WHERE " + " AND ".join(conditions)
+        cursor.execute(sql, params)
+        raw_results = cursor.fetchall()
+        
+        results = []
+        for row in raw_results:
+            result = dict(row)
+            transcript = result.get("transcript") or ""
+            total_hits = sum(transcript.lower().count(word.lower()) for word in words)
+            result["transcript_hits"] = total_hits
+            result["transcript"] = None  # Remove full transcript if needed.
+            results.append(result)
+        # Sort results so files with more transcript hits are on top.
+        results.sort(key=lambda x: x["transcript_hits"], reverse=True)
+    
+    return jsonify(results=results)
+
+def search():
+    return render_template('search.html')
+
--- a/static/app.js
+++ b/static/app.js
@ -23,7 +23,11 @@ function paintFile() {
    if (currentMusicFile) {
      const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`);
      if (currentMusicFileElement) {
-          currentMusicFileElement.closest('.file-item').classList.add('currently-playing');
+        const fileItem = currentMusicFileElement.closest('.file-item');
+        fileItem.classList.add('currently-playing');
+        // setTimeout(() => {
+        //   fileItem.scrollIntoView({ block: "center", inline: "nearest" });
+        // }, 300);
      }
    }
  }
@ -144,6 +148,12 @@ function loadDirectory(subpath) {
    .then(response => response.json())
    .then(data => {
      renderContent(data);
+      if (data.playfile) {
+        const playFileLink = document.querySelector(`.play-file[data-url="${data.playfile}"]`);
+        if (playFileLink) {
+          playFileLink.click();
+        }
+      }
      paintFile();
      return data; // return data for further chaining
    })
--- a/templates/search.html
+++ b/templates/search.html
@ -0,0 +1,104 @@
+<!doctype html>
+<html>
+<head>
+  <meta charset="utf-8">
+
+  <meta property="og:title" content="Gottesdienste Speyer und Schwegenheim" />
+  <meta property="og:description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft." />
+  <meta property="og:image" content="https://app.bethaus-speyer.de/static/icons/logo-200x200.png" />
+  <meta property="og:url" content="https://app.bethaus-speyer.de" />
+
+  <title>Dateisuche</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
+  <meta name="description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft.">
+  <meta name="author" content="Bethaus Speyer">
+  <link rel="icon" href="/static/icons/logo-192x192.png" type="image/png" sizes="192x192">
+
+    <!-- Bootstrap CSS for modern styling -->
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+    <link rel="stylesheet" href="{{ url_for('static', filename='app.css') }}">
+    <style>
+        body {
+            background-color: #f8f9fa;
+        }
+        .search-container {
+            margin-top: 50px;
+        }
+        .card {
+            margin-bottom: 20px;
+        }
+    </style>
+</head>
+<body>
+    <header class="site-header">
+        <a href="#">
+          <img src="/static/logoW.png" alt="Logo" class="logo">
+        </a>
+        <h1>Suche</h1>
+      </header>
+<div class="container search-container">
+    <form id="searchForm" method="post" class="mb-4">
+        <div class="mb-3">
+            <label for="query" class="form-label">Suchwörter:</label>
+            <input type="text" id="query" name="query" class="form-control" required>
+        </div>
+        <div class="form-check mb-3">
+            <input type="checkbox" class="form-check-input" id="includeTranscript" name="includeTranscript">
+            <label class="form-check-label" for="includeTranscript">Im Transkript suchen</label>
+        </div>
+        <button type="submit" class="btn btn-primary">Suchen</button>
+    </form>
+
+    <!-- Container for AJAX-loaded results -->
+    <div id="results"></div>
+</div>
+
+<script>
+document.getElementById('searchForm').addEventListener('submit', function(e) {
+    e.preventDefault();
+    const query = document.getElementById('query').value.trim();
+    const includeTranscript = document.getElementById('includeTranscript').checked;
+    
+    // Prepare form data
+    const formData = new FormData();
+    formData.append('query', query);
+    formData.append('includeTranscript', includeTranscript);
+    
+    fetch('/searchcommand', {
+        method: 'POST',
+        body: formData
+    })
+    .then(response => response.json())
+    .then(data => {
+        const resultsDiv = document.getElementById('results');
+        resultsDiv.innerHTML = ''; // Clear previous results
+        if (data.results && data.results.length > 0) {
+            data.results.forEach(file => {
+                // Create a card element for each result
+                const card = document.createElement('div');
+                card.className = 'card';
+                card.innerHTML = `
+                    <div class="card-body">
+                        <h5 class="card-title">
+                            <a href="/path/${file.relative_path}" target="_blank">${file.filename}</a>
+                        </h5>
+                        <h6 class="card-subtitle mb-2 text-muted">${file.relative_path}</h6>
+                        ${file.transcript_hits !== undefined ? `<p class="card-text">Treffer im Transkript: ${file.transcript_hits}</p>` : ''}
+                    </div>
+                `;
+                resultsDiv.appendChild(card);
+            });
+        } else {
+            resultsDiv.innerHTML = '<p>No results found.</p>';
+        }
+    })
+    .catch(error => {
+        console.error('Error:', error);
+    });
+});
+</script>
+
+<!-- Bootstrap Bundle with Popper -->
+<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+</body>
+</html>