add search

2025-04-05 00:52:32 +02:00 · 2025-04-05 00:52:32 +02:00 · 27f96fff6e
commit 27f96fff6e
parent fd83b89e6d
6 changed files with 316 additions and 11 deletions
--- a/app.py
+++ b/app.py
@ -16,6 +16,7 @@ from urllib.parse import urlparse, unquote
 from werkzeug.middleware.proxy_fix import ProxyFix
 import re
 import search
 import auth
 import analytics as a
@ -37,6 +38,9 @@ app.add_url_rule('/dashboard', view_func=a.dashboard)
 app.add_url_rule('/connections', view_func=a.connections)
 app.add_url_rule('/mylinks', view_func=auth.mylinks)
 app.add_url_rule('/remove_secret', view_func=auth.remove_secret, methods=['POST'])
 app.add_url_rule('/search', view_func=search.search, methods=['GET'])
 app.add_url_rule('/searchcommand', view_func=search.searchcommand, methods=['POST'])
 # Grab the HOST_RULE environment variable
 host_rule = os.getenv("HOST_RULE", "")
@ -181,17 +185,33 @@ def api_browse(subpath):
    base_path = session['folders'][root]
    directory = os.path.join(base_path, *relative_parts)
    playfile = None
    # Check if the constructed directory exists.
    if not os.path.isdir(directory):
        # Assume the last segment is a filename; remove it.
        if relative_parts:
            playfile = relative_parts.pop()  # Get the filename.
            directory = os.path.join(base_path, *relative_parts)
            # Rebuild subpath to reflect the directory (without the file).
            subpath = '/'.join([root] + relative_parts)
        # If the parent directory still doesn't exist, return error.
        if not os.path.isdir(directory):
            return jsonify({'error': 'Directory not found'}), 404
    directories, files = list_directory_contents(directory, subpath)
    breadcrumbs = generate_breadcrumbs(subpath)
-    return jsonify({
+    response = {
        'breadcrumbs': breadcrumbs,
        'directories': directories,
        'files': files
-    })
+    }
    # If a filename was selected include it.
    if playfile:
        response['playfile'] = os.path.join(subpath, playfile).replace(os.sep, '/')
    return jsonify(response)
@app.route("/media/<path:subpath>")
@auth.require_secret
--- a/index_for_search.py
+++ b/index_for_search.py
@ -0,0 +1,113 @@
 import os
 import json
 import sqlite3
 SEARCH_DB_NAME = 'search.db'
 search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
 search_db.row_factory = sqlite3.Row
 def init_db():
    """Initializes the database with the required schema."""
    cursor = search_db.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS files (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            relative_path TEXT,
            filename TEXT,
            filetype TEXT,
            transcript TEXT,
            UNIQUE(relative_path, filename)
        )
    ''')
    search_db.commit()
 def scan_dir(directory):
    """Recursively scan directories using os.scandir for improved performance."""
    try:
        with os.scandir(directory) as it:
            for entry in it:
                if entry.is_dir(follow_symlinks=False):
                    # Skip transcription directories immediately.
                    if entry.name.lower() == "transkription":
                        continue
                    yield from scan_dir(entry.path)
                elif entry.is_file(follow_symlinks=False):
                    yield entry
    except PermissionError:
        return
 def updatefileindex():
    cursor = search_db.cursor()
    # Load folder configuration from JSON file.
    with open("folder_config.json", "r", encoding="utf-8") as f:
        config_data = json.load(f)
    # Process each configured base folder.
    for config in config_data:
        for folder in config.get("folders", []):
            foldername = folder.get("foldername")
            raw_folderpath = folder.get("folderpath")
            norm_folderpath = os.path.normpath(raw_folderpath)
            # Precompute the length of the base folder path (plus one for the separator)
            base_len = len(norm_folderpath) + 1
            # Accumulate scanned file data and keys for this base folder.
            scanned_files = []  # Each entry: (relative_path, filename, filetype, transcript)
            current_keys = set()
            for entry in scan_dir(norm_folderpath):
                entry_path = os.path.normpath(entry.path)
                # Get relative part by slicing if possible.
                if entry_path.startswith(norm_folderpath):
                    rel_part = entry_path[base_len:]
                else:
                    rel_part = os.path.relpath(entry_path, norm_folderpath)
                # Prepend the foldername so it becomes part of the stored relative path.
                relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
                print(relative_path)
                filetype = os.path.splitext(entry.name)[1].lower()
                transcript = None
                # Check for a corresponding transcript file in a sibling "Transkription" folder.
                parent_dir = os.path.dirname(entry_path)
                transcript_dir = os.path.join(parent_dir, "Transkription")
                transcript_filename = os.path.splitext(entry.name)[0] + ".md"
                transcript_path = os.path.join(transcript_dir, transcript_filename)
                if os.path.exists(transcript_path):
                    try:
                        with open(transcript_path, 'r', encoding='utf-8') as tf:
                            transcript = tf.read()
                    except Exception:
                        transcript = None
                scanned_files.append((relative_path, entry.name, filetype, transcript))
                current_keys.add((relative_path, entry.name))
            # Remove database entries for files under this base folder that are no longer on disk.
            pattern = foldername + os.sep + '%'
            cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,))
            db_rows = cursor.fetchall()
            keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows)
            keys_to_delete = keys_in_db - current_keys
            for key in keys_to_delete:
                cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key)
            # Bulk write the scanned files using INSERT OR REPLACE.
            cursor.executemany(
                "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)",
                scanned_files
            )
            # Commit changes after processing this base folder.
            search_db.commit()
    return "File index updated successfully"
 if __name__ == "__main__":
    init_db()  # Initialize the database schema if it doesn't exist
    updatefileindex()  # Update the file index
    search_db.close()  # Close the database connection
    print("Database connection closed.")
--- a/search.db
+++ b/search.db
--- a/search.py
+++ b/search.py
@ -0,0 +1,58 @@
 import sqlite3
 from flask import Flask, render_template, request, request, jsonify
 app = Flask(__name__)
 SEARCH_DB_NAME = 'search.db'
 search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
 search_db.row_factory = sqlite3.Row
 def searchcommand():
    query = request.form.get("query", "").strip()
    include_transcript = request.form.get("includeTranscript") == "true" or request.form.get("includeTranscript") == "on"
    words = [w for w in query.split() if w]
    cursor = search_db.cursor()
    if not include_transcript:
        # Simple search: all words must be in either relative_path or filename.
        conditions = []
        params = []
        for word in words:
            conditions.append("(relative_path LIKE ? OR filename LIKE ?)")
            params.extend([f"%{word}%", f"%{word}%"])
        sql = "SELECT * FROM files"
        if conditions:
            sql += " WHERE " + " AND ".join(conditions)
        cursor.execute(sql, params)
        raw_results = cursor.fetchall()
        results = [dict(row) for row in raw_results]
    else:
        # Advanced search: include transcript. Count transcript hits.
        conditions = []
        params = []
        for word in words:
            conditions.append("(relative_path LIKE ? OR filename LIKE ? OR transcript LIKE ?)")
            params.extend([f"%{word}%", f"%{word}%", f"%{word}%"])
        sql = "SELECT * FROM files"
        if conditions:
            sql += " WHERE " + " AND ".join(conditions)
        cursor.execute(sql, params)
        raw_results = cursor.fetchall()
        results = []
        for row in raw_results:
            result = dict(row)
            transcript = result.get("transcript") or ""
            total_hits = sum(transcript.lower().count(word.lower()) for word in words)
            result["transcript_hits"] = total_hits
            result["transcript"] = None  # Remove full transcript if needed.
            results.append(result)
        # Sort results so files with more transcript hits are on top.
        results.sort(key=lambda x: x["transcript_hits"], reverse=True)
    return jsonify(results=results)
 def search():
    return render_template('search.html')
--- a/static/app.js
+++ b/static/app.js
@ -23,11 +23,15 @@ function paintFile() {
    if (currentMusicFile) {
      const currentMusicFileElement = document.querySelector(`.play-file[data-url="${currentMusicFile.path}"]`);
      if (currentMusicFileElement) {
-          currentMusicFileElement.closest('.file-item').classList.add('currently-playing');
+        const fileItem = currentMusicFileElement.closest('.file-item');
-        }
+        fileItem.classList.add('currently-playing');
        // setTimeout(() => {
        //   fileItem.scrollIntoView({ block: "center", inline: "nearest" });
        // }, 300);
      }
    }
  }
 }
 function renderContent(data) {
@ -144,6 +148,12 @@ function loadDirectory(subpath) {
    .then(response => response.json())
    .then(data => {
      renderContent(data);
      if (data.playfile) {
        const playFileLink = document.querySelector(`.play-file[data-url="${data.playfile}"]`);
        if (playFileLink) {
          playFileLink.click();
        }
      }
      paintFile();
      return data; // return data for further chaining
    })
--- a/templates/search.html
+++ b/templates/search.html
@ -0,0 +1,104 @@
 <!doctype html>
 <html>
 <head>
  <meta charset="utf-8">
  <meta property="og:title" content="Gottesdienste Speyer und Schwegenheim" />
  <meta property="og:description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft." />
  <meta property="og:image" content="https://app.bethaus-speyer.de/static/icons/logo-200x200.png" />
  <meta property="og:url" content="https://app.bethaus-speyer.de" />
  <title>Dateisuche</title>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
  <meta name="description" content="... uns aber, die wir gerettet werden, ist es eine Gotteskraft.">
  <meta name="author" content="Bethaus Speyer">
  <link rel="icon" href="/static/icons/logo-192x192.png" type="image/png" sizes="192x192">
    <!-- Bootstrap CSS for modern styling -->
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <link rel="stylesheet" href="{{ url_for('static', filename='app.css') }}">
    <style>
        body {
            background-color: #f8f9fa;
        }
        .search-container {
            margin-top: 50px;
        }
        .card {
            margin-bottom: 20px;
        }
    </style>
 </head>
 <body>
    <header class="site-header">
        <a href="#">
          <img src="/static/logoW.png" alt="Logo" class="logo">
        </a>
        <h1>Suche</h1>
      </header>
 <div class="container search-container">
    <form id="searchForm" method="post" class="mb-4">
        <div class="mb-3">
            <label for="query" class="form-label">Suchwörter:</label>
            <input type="text" id="query" name="query" class="form-control" required>
        </div>
        <div class="form-check mb-3">
            <input type="checkbox" class="form-check-input" id="includeTranscript" name="includeTranscript">
            <label class="form-check-label" for="includeTranscript">Im Transkript suchen</label>
        </div>
        <button type="submit" class="btn btn-primary">Suchen</button>
    </form>
    <!-- Container for AJAX-loaded results -->
    <div id="results"></div>
 </div>
 <script>
 document.getElementById('searchForm').addEventListener('submit', function(e) {
    e.preventDefault();
    const query = document.getElementById('query').value.trim();
    const includeTranscript = document.getElementById('includeTranscript').checked;
    // Prepare form data
    const formData = new FormData();
    formData.append('query', query);
    formData.append('includeTranscript', includeTranscript);
    fetch('/searchcommand', {
        method: 'POST',
        body: formData
    })
    .then(response => response.json())
    .then(data => {
        const resultsDiv = document.getElementById('results');
        resultsDiv.innerHTML = ''; // Clear previous results
        if (data.results && data.results.length > 0) {
            data.results.forEach(file => {
                // Create a card element for each result
                const card = document.createElement('div');
                card.className = 'card';
                card.innerHTML = `
                    <div class="card-body">
                        <h5 class="card-title">
                            <a href="/path/${file.relative_path}" target="_blank">${file.filename}</a>
                        </h5>
                        <h6 class="card-subtitle mb-2 text-muted">${file.relative_path}</h6>
                        ${file.transcript_hits !== undefined ? `<p class="card-text">Treffer im Transkript: ${file.transcript_hits}</p>` : ''}
                    </div>
                `;
                resultsDiv.appendChild(card);
            });
        } else {
            resultsDiv.innerHTML = '<p>No results found.</p>';
        }
    })
    .catch(error => {
        console.error('Error:', error);
    });
 });
 </script>
 <!-- Bootstrap Bundle with Popper -->
 <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
 </body>
 </html>