161 lines
5.9 KiB
Python
161 lines
5.9 KiB
Python
import os
|
|
import sqlite3
|
|
from flask import Flask, render_template, request, request, jsonify, session
|
|
import random
|
|
import json
|
|
from datetime import datetime
|
|
from urllib.parse import quote
|
|
|
|
app = Flask(__name__)
|
|
|
|
search_db = sqlite3.connect('search.db', check_same_thread=False)
|
|
search_db.row_factory = sqlite3.Row
|
|
|
|
with open("app_config.json", 'r') as file:
|
|
app_config = json.load(file)
|
|
|
|
FILETYPE_GROUPS = {
|
|
'audio': ('.mp3', '.wav', '.ogg', '.m4a', '.flac'),
|
|
'video': ('.mp4', '.mov', '.mkv', '.avi', '.webm'),
|
|
'image': ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff'),
|
|
'liedtext': ('.sng',)
|
|
}
|
|
ALL_GROUP_EXTS = tuple(sorted({ext for group in FILETYPE_GROUPS.values() for ext in group}))
|
|
ALL_GROUP_KEYS = set(FILETYPE_GROUPS.keys())
|
|
|
|
def searchcommand():
|
|
query = request.form.get("query", "").strip()
|
|
category = request.form.get("category", "").strip()
|
|
searchfolder = request.form.get("folder", "").strip()
|
|
datefrom = request.form.get("datefrom", "").strip()
|
|
dateto = request.form.get("dateto", "").strip()
|
|
filetypes = [ft.strip().lower() for ft in request.form.getlist("filetype") if ft.strip()]
|
|
if not filetypes:
|
|
# Default to audio when nothing selected
|
|
filetypes = ['audio']
|
|
|
|
include_transcript = request.form.get("includeTranscript") in ["true", "on"]
|
|
words = [w for w in query.split() if w]
|
|
cursor = search_db.cursor()
|
|
|
|
# Determine allowed basefolders
|
|
allowed_basefolders = list(session['folders'].keys())
|
|
if searchfolder and searchfolder in allowed_basefolders:
|
|
allowed_basefolders = [searchfolder]
|
|
|
|
# Build conditions and parameters
|
|
conditions = []
|
|
params = []
|
|
|
|
# Choose fields for word search
|
|
if include_transcript:
|
|
fields = ['filename', 'transcript']
|
|
else:
|
|
fields = ['relative_path', 'filename']
|
|
|
|
for word in words:
|
|
field_clauses = [f"{f} LIKE ?" for f in fields]
|
|
conditions.append(f"({ ' OR '.join(field_clauses) })")
|
|
for _ in fields:
|
|
params.append(f"%{word}%")
|
|
|
|
# Category filter
|
|
if category:
|
|
conditions.append("filename LIKE ?")
|
|
params.append(f"%{category}%")
|
|
|
|
# Basefolder filter
|
|
if allowed_basefolders:
|
|
placeholders = ",".join("?" for _ in allowed_basefolders)
|
|
conditions.append(f"basefolder IN ({placeholders})")
|
|
params.extend(allowed_basefolders)
|
|
|
|
# Date range filters
|
|
if datefrom:
|
|
try:
|
|
conditions.append("performance_date >= ?")
|
|
params.append(datefrom)
|
|
except ValueError:
|
|
pass
|
|
if dateto:
|
|
try:
|
|
conditions.append("performance_date <= ?")
|
|
params.append(dateto)
|
|
except ValueError:
|
|
pass
|
|
# Ensure we only include entries with dates when filtering by date
|
|
if datefrom or dateto:
|
|
conditions.append("performance_date IS NOT NULL")
|
|
|
|
# Filetype filters (multiple selection)
|
|
selected_groups = [ft for ft in filetypes if ft in FILETYPE_GROUPS]
|
|
include_other = 'other' in filetypes
|
|
|
|
# If not all groups selected, apply filter
|
|
if set(filetypes) != (ALL_GROUP_KEYS | {'other'}):
|
|
clauses = []
|
|
if selected_groups:
|
|
ext_list = tuple({ext for g in selected_groups for ext in FILETYPE_GROUPS[g]})
|
|
placeholders = ",".join("?" for _ in ext_list)
|
|
clauses.append(f"filetype IN ({placeholders})")
|
|
params.extend(ext_list)
|
|
if include_other:
|
|
placeholders = ",".join("?" for _ in ALL_GROUP_EXTS)
|
|
clauses.append(f"(filetype IS NULL OR filetype = '' OR filetype NOT IN ({placeholders}))")
|
|
params.extend(ALL_GROUP_EXTS)
|
|
if clauses:
|
|
conditions.append("(" + " OR ".join(clauses) + ")")
|
|
|
|
# Build and execute SQL
|
|
sql = "SELECT * FROM files"
|
|
if conditions:
|
|
sql += " WHERE " + " AND ".join(conditions)
|
|
cursor.execute(sql, params)
|
|
raw_results = cursor.fetchall()
|
|
total_results = len(raw_results)
|
|
|
|
# Process results
|
|
results = []
|
|
for row in raw_results:
|
|
record = dict(row)
|
|
if include_transcript:
|
|
transcript = record.get('transcript', '') or ''
|
|
record['transcript_hits'] = sum(
|
|
transcript.lower().count(w.lower()) for w in words
|
|
)
|
|
record.pop('transcript', None)
|
|
|
|
filetype = (record.get('filetype') or '').lower()
|
|
if filetype == '.sng':
|
|
record['fulltext_type'] = 'sng'
|
|
record['fulltext_url'] = f"/media/{quote(record.get('relative_path', ''), safe='/')}"
|
|
else:
|
|
record['fulltext_type'] = 'transcript'
|
|
relative_path = record.get('relative_path', '')
|
|
filename = record.get('filename', '')
|
|
name_root = os.path.splitext(filename)[0] if filename else os.path.splitext(os.path.basename(relative_path))[0]
|
|
parent = os.path.dirname(relative_path)
|
|
if parent:
|
|
transcript_rel_path = f"{parent}/Transkription/{name_root}.md"
|
|
else:
|
|
transcript_rel_path = f"Transkription/{name_root}.md"
|
|
record['fulltext_url'] = f"/transcript/{quote(transcript_rel_path, safe='/')}"
|
|
# convert date to TT.MM.YYYY format
|
|
if record.get('performance_date'):
|
|
try:
|
|
performance_date = datetime.strptime(record['performance_date'], "%Y-%m-%d")
|
|
record['performance_date'] = performance_date.strftime("%d.%m.%Y")
|
|
except (ValueError, TypeError):
|
|
record['performance_date'] = None
|
|
record['query'] = query
|
|
results.append(record)
|
|
|
|
# Randomize and sort
|
|
random.shuffle(results)
|
|
key = 'transcript_hits' if include_transcript else 'hitcount'
|
|
results.sort(key=lambda x: x.get(key, 0), reverse=True)
|
|
|
|
# Limit results
|
|
results = results[:20]
|
|
return jsonify(results=results, total=total_results)
|