bethaus-app/search.py
2026-01-23 15:15:42 +00:00

140 lines
4.8 KiB
Python

import sqlite3
from flask import Flask, render_template, request, request, jsonify, session
import random
import json
from datetime import datetime
app = Flask(__name__)
search_db = sqlite3.connect('search.db', check_same_thread=False)
search_db.row_factory = sqlite3.Row
with open("app_config.json", 'r') as file:
app_config = json.load(file)
FILETYPE_GROUPS = {
'audio': ('.mp3', '.wav', '.ogg', '.m4a', '.flac'),
'video': ('.mp4', '.mov', '.mkv', '.avi', '.webm'),
'image': ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff')
}
ALL_GROUP_EXTS = tuple(sorted({ext for group in FILETYPE_GROUPS.values() for ext in group}))
def searchcommand():
query = request.form.get("query", "").strip()
category = request.form.get("category", "").strip()
searchfolder = request.form.get("folder", "").strip()
datefrom = request.form.get("datefrom", "").strip()
dateto = request.form.get("dateto", "").strip()
filetypes = [ft.strip().lower() for ft in request.form.getlist("filetype") if ft.strip()]
if not filetypes:
# Default to audio when nothing selected
filetypes = ['audio']
include_transcript = request.form.get("includeTranscript") in ["true", "on"]
words = [w for w in query.split() if w]
cursor = search_db.cursor()
# Determine allowed basefolders
allowed_basefolders = list(session['folders'].keys())
if searchfolder and searchfolder in allowed_basefolders:
allowed_basefolders = [searchfolder]
# Build conditions and parameters
conditions = []
params = []
# Choose fields for word search
if include_transcript:
fields = ['filename', 'transcript']
else:
fields = ['relative_path', 'filename']
for word in words:
field_clauses = [f"{f} LIKE ?" for f in fields]
conditions.append(f"({ ' OR '.join(field_clauses) })")
for _ in fields:
params.append(f"%{word}%")
# Category filter
if category:
conditions.append("filename LIKE ?")
params.append(f"%{category}%")
# Basefolder filter
if allowed_basefolders:
placeholders = ",".join("?" for _ in allowed_basefolders)
conditions.append(f"basefolder IN ({placeholders})")
params.extend(allowed_basefolders)
# Date range filters
if datefrom:
try:
conditions.append("performance_date >= ?")
params.append(datefrom)
except ValueError:
pass
if dateto:
try:
conditions.append("performance_date <= ?")
params.append(dateto)
except ValueError:
pass
# Ensure we only include entries with dates when filtering by date
if datefrom or dateto:
conditions.append("performance_date IS NOT NULL")
# Filetype filters (multiple selection)
selected_groups = [ft for ft in filetypes if ft in FILETYPE_GROUPS]
include_other = 'other' in filetypes
# If not all groups selected, apply filter
if set(filetypes) != {'audio', 'video', 'image', 'other'}:
clauses = []
if selected_groups:
ext_list = tuple({ext for g in selected_groups for ext in FILETYPE_GROUPS[g]})
placeholders = ",".join("?" for _ in ext_list)
clauses.append(f"filetype IN ({placeholders})")
params.extend(ext_list)
if include_other:
placeholders = ",".join("?" for _ in ALL_GROUP_EXTS)
clauses.append(f"(filetype IS NULL OR filetype = '' OR filetype NOT IN ({placeholders}))")
params.extend(ALL_GROUP_EXTS)
if clauses:
conditions.append("(" + " OR ".join(clauses) + ")")
# Build and execute SQL
sql = "SELECT * FROM files"
if conditions:
sql += " WHERE " + " AND ".join(conditions)
cursor.execute(sql, params)
raw_results = cursor.fetchall()
# Process results
results = []
for row in raw_results:
record = dict(row)
if include_transcript:
transcript = record.get('transcript', '') or ''
record['transcript_hits'] = sum(
transcript.lower().count(w.lower()) for w in words
)
record.pop('transcript', None)
# convert date to TT.MM.YYYY format
if record.get('performance_date'):
try:
performance_date = datetime.strptime(record['performance_date'], "%Y-%m-%d")
record['performance_date'] = performance_date.strftime("%d.%m.%Y")
except (ValueError, TypeError):
record['performance_date'] = None
record['query'] = query
results.append(record)
# Randomize and sort
random.shuffle(results)
key = 'transcript_hits' if include_transcript else 'hitcount'
results.sort(key=lambda x: x.get(key, 0), reverse=True)
# Limit results
results = results[:100]
return jsonify(results=results)