diff --git a/index_for_search.py b/index_for_search.py index e1b0679..faaf9de 100755 --- a/index_for_search.py +++ b/index_for_search.py @@ -39,41 +39,12 @@ def init_db(): ''') search_db.commit() # If the table already existed, try to add the new columns. - try: - cursor.execute("ALTER TABLE files ADD COLUMN hitcount INTEGER DEFAULT 0") - except sqlite3.OperationalError: - # Likely the column already exists, so we ignore this error. - pass - try: - cursor.execute("ALTER TABLE files ADD COLUMN basefolder TEXT") - except sqlite3.OperationalError: - # Likely the column already exists, so we ignore this error. - pass - try: - cursor.execute("ALTER TABLE files ADD COLUMN category TEXT") - except sqlite3.OperationalError: - # Likely the column already exists, so we ignore this error. - pass - try: - cursor.execute("ALTER TABLE files ADD COLUMN titel TEXT") - except sqlite3.OperationalError: - # Likely the column already exists, so we ignore this error. - pass - try: - cursor.execute("ALTER TABLE files ADD COLUMN name TEXT") - except sqlite3.OperationalError: - # Likely the column already exists, so we ignore this error. - pass - try: - cursor.execute("ALTER TABLE files ADD COLUMN performance_date TEXT") - except sqlite3.OperationalError: - # Likely the column already exists, so we ignore this error. - pass - try: - cursor.execute("ALTER TABLE files ADD COLUMN site TEXT") - except sqlite3.OperationalError: - # Likely the column already exists, so we ignore this error. - pass + # try: + # cursor.execute("ALTER TABLE files ADD COLUMN category TEXT") + # except sqlite3.OperationalError: + # # Likely the column already exists, so we ignore this error. + # pass + search_db.commit() def scan_dir(directory): @@ -200,16 +171,15 @@ def updatefileindex(): date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path) if date_match: date_str = date_match.group(1) - # Convert to YYYY-MM-DD format - try: - date_obj = datetime.strptime(date_str, '%d.%m.%Y') - performance_date = date_obj.strftime('%d.%m.%Y') - except ValueError: + performance_date = None + for fmt in ('%d.%m.%Y', '%d.%m.%y', '%Y-%m-%d'): try: - date_obj = datetime.strptime(date_str, '%d.%m.%y') - performance_date = date_obj.strftime('%d.%m.%Y') + date_obj = datetime.strptime(date_str, fmt) + # Convert to ISO format YYYY-MM-DD + performance_date = date_obj.strftime('%Y-%m-%d') + break except ValueError: - performance_date = None + continue else: performance_date = None @@ -236,7 +206,52 @@ def updatefileindex(): return "File index updated successfully" +def convert_dates(search_db, + date_formats=('%d.%m.%Y', '%d.%m.%y')): + """ + Connects to the SQLite database at search_db, and for every row in table 'files': + - Reads the date from performance_date (expects 'dd.mm.yyyy' or 'dd.mm.yy'). + - Parses it and reformats to ISO 'YYYY-MM-DD'. + - Updates the row (using id as primary key). + + Only counts rows where the conversion was successful. + """ + # Regex to quickly filter out non-matching strings + date_regex = re.compile(r'^\d{1,2}\.\d{1,2}\.\d{2,4}$') + + cur = search_db.cursor() + + # Fetch all rows with a non-null date + cur.execute("SELECT id, performance_date FROM files") + rows = cur.fetchall() + + converted_count = 0 + + for pk, raw_date in rows: + if not raw_date or not date_regex.match(raw_date): + continue + + for fmt in date_formats: + try: + dt = datetime.strptime(raw_date, fmt) + new_date = dt.strftime('%Y-%m-%d') + # Only update if the reformatted date is different + if new_date != raw_date: + cur.execute( + "UPDATE files SET performance_date = ? WHERE id = ?", + (new_date, pk) + ) + converted_count += 1 + break # stop trying other formats + except ValueError: + continue + + search_db.commit() + search_db.close() + print(f"Converted {converted_count} rows to ISO format.") + if __name__ == "__main__": + convert_dates(search_db) init_db() # Initialize the database schema if it doesn't exist updatefileindex() # Update the file index search_db.close() # Close the search database connection diff --git a/search.py b/search.py index 3a9c4b6..42c7cd0 100644 --- a/search.py +++ b/search.py @@ -17,87 +17,91 @@ def searchcommand(): query = request.form.get("query", "").strip() category = request.form.get("category", "").strip() searchfolder = request.form.get("folder", "").strip() + datefrom = request.form.get("datefrom", "").strip() + dateto = request.form.get("dateto", "").strip() include_transcript = request.form.get("includeTranscript") in ["true", "on"] words = [w for w in query.split() if w] cursor = search_db.cursor() - - allowed_basefolders = list(session['folders'].keys()) - # if the search folder is allowed to be searched, select it - # if not just allowed_basefolders rules apply - if searchfolder != "" and searchfolder in allowed_basefolders: + # Determine allowed basefolders + allowed_basefolders = list(session['folders'].keys()) + if searchfolder and searchfolder in allowed_basefolders: allowed_basefolders = [searchfolder] - if not include_transcript: - conditions = [] - params = [] - # Apply query words to relative_path and filename - for word in words: - conditions.append("(relative_path LIKE ? OR filename LIKE ?)") - params.extend([f"%{word}%", f"%{word}%"]) - # Search category in filename - if category: - conditions.append("(filename LIKE ?)") - params.extend([f"%{category}%"]) - # Only include rows where basefolder is in allowed_basefolders - if allowed_basefolders: - placeholders = ",".join("?" for _ in allowed_basefolders) - conditions.append(f"basefolder IN ({placeholders})") - params.extend(allowed_basefolders) - - sql = "SELECT * FROM files" - if conditions: - sql += " WHERE " + " AND ".join(conditions) - - cursor.execute(sql, params) - raw_results = cursor.fetchall() - results = [dict(row) for row in raw_results] - - # Randomize the list before sorting to break ties randomly. - random.shuffle(results) - results.sort(key=lambda x: x["hitcount"], reverse=True) + # Build conditions and parameters + conditions = [] + params = [] + # Choose fields for word search + if include_transcript: + fields = ['filename', 'transcript'] else: - # Advanced search: include transcript. Count transcript hits. - conditions = [] - params = [] - # Apply query words for filename and transcript - for word in words: - conditions.append("(filename LIKE ? OR transcript LIKE ?)") - params.extend([f"%{word}%", f"%{word}%"]) - # Search category in filename - if category: - conditions.append("(filename LIKE ?)") - params.extend([f"%{category}%"]) - # Only include rows where basefolder is in allowed_basefolders - if allowed_basefolders: - placeholders = ",".join("?" for _ in allowed_basefolders) - conditions.append(f"basefolder IN ({placeholders})") - params.extend(allowed_basefolders) + fields = ['relative_path', 'filename'] - sql = "SELECT * FROM files" - if conditions: - sql += " WHERE " + " AND ".join(conditions) - cursor.execute(sql, params) - raw_results = cursor.fetchall() + for word in words: + field_clauses = [f"{f} LIKE ?" for f in fields] + conditions.append(f"({ ' OR '.join(field_clauses) })") + for _ in fields: + params.append(f"%{word}%") - results = [] - for row in raw_results: - result = dict(row) - transcript = result.get("transcript") or "" - total_hits = sum(transcript.lower().count(word.lower()) for word in words) - result["transcript_hits"] = total_hits - result.pop("transcript", None) - results.append(result) - - # Randomize the list before sorting to break ties randomly. - random.shuffle(results) - results.sort(key=lambda x: x["transcript_hits"], reverse=True) + # Category filter + if category: + conditions.append("filename LIKE ?") + params.append(f"%{category}%") + # Basefolder filter + if allowed_basefolders: + placeholders = ",".join("?" for _ in allowed_basefolders) + conditions.append(f"basefolder IN ({placeholders})") + params.extend(allowed_basefolders) + + # Date range filters + if datefrom: + try: + conditions.append("performance_date >= ?") + params.append(datefrom) + except ValueError: + pass + if dateto: + try: + conditions.append("performance_date <= ?") + params.append(dateto) + except ValueError: + pass + # Ensure we only include entries with dates when filtering by date + if datefrom or dateto: + conditions.append("performance_date IS NOT NULL") + + # Build and execute SQL + sql = "SELECT * FROM files" + if conditions: + sql += " WHERE " + " AND ".join(conditions) + cursor.execute(sql, params) + raw_results = cursor.fetchall() + + # Process results + results = [] + for row in raw_results: + record = dict(row) + if include_transcript: + transcript = record.get('transcript', '') or '' + record['transcript_hits'] = sum( + transcript.lower().count(w.lower()) for w in words + ) + record.pop('transcript', None) + results.append(record) + + # Randomize and sort + random.shuffle(results) + key = 'transcript_hits' if include_transcript else 'hitcount' + results.sort(key=lambda x: x.get(key, 0), reverse=True) + + # Limit results results = results[:100] return jsonify(results=results) + def search(): allowed_basefolders = list(session['folders'].keys()) title_short = app_config.get('TITLE_SHORT', 'Default Title') diff --git a/templates/base.html b/templates/base.html index a104459..c401eab 100644 --- a/templates/base.html +++ b/templates/base.html @@ -7,6 +7,7 @@ {% block title %}Meine Links{% endblock %} + diff --git a/templates/search.html b/templates/search.html index fde8869..40787de 100644 --- a/templates/search.html +++ b/templates/search.html @@ -28,6 +28,7 @@ + @@ -50,70 +51,116 @@

{{ title_long }}

+

Suche

+ + +
+ + +
+ + +
+

Suchoptionen

+ +
+ + +
+ +
- - -
- -
- -
-
- - - -
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -
+ +
+
+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
- + +
+ +
- - + +
- + +
+ +
- - + +
-
- - - - - + +
+ + +
+
+ + +
+
+ + +
+ +
+ + + +
+ + + +
+ - + +
-
+
+