import os
import json
import sqlite3

SEARCH_DB_NAME = 'search.db'

search_db = sqlite3.connect(SEARCH_DB_NAME, check_same_thread=False)
search_db.row_factory = sqlite3.Row

def init_db():
    """Initializes the database with the required schema."""
    cursor = search_db.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS files (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            relative_path TEXT,
            filename TEXT,
            filetype TEXT,
            transcript TEXT,
            UNIQUE(relative_path, filename)
        )
    ''')
    search_db.commit()

def scan_dir(directory):
    """Recursively scan directories using os.scandir for improved performance."""
    try:
        with os.scandir(directory) as it:
            for entry in it:
                if entry.is_dir(follow_symlinks=False):
                    # Skip transcription directories immediately.
                    if entry.name.lower() == "transkription":
                        continue
                    yield from scan_dir(entry.path)
                elif entry.is_file(follow_symlinks=False):
                    yield entry
    except PermissionError:
        return

def updatefileindex():
    cursor = search_db.cursor()

    # Load folder configuration from JSON file.
    with open("folder_config.json", "r", encoding="utf-8") as f:
        config_data = json.load(f)

    # Process each configured base folder.
    for config in config_data:
        for folder in config.get("folders", []):
            foldername = folder.get("foldername")
            raw_folderpath = folder.get("folderpath")
            norm_folderpath = os.path.normpath(raw_folderpath)
            # Precompute the length of the base folder path (plus one for the separator)
            base_len = len(norm_folderpath) + 1

            # Accumulate scanned file data and keys for this base folder.
            scanned_files = []  # Each entry: (relative_path, filename, filetype, transcript)
            current_keys = set()

            for entry in scan_dir(norm_folderpath):
                entry_path = os.path.normpath(entry.path)
                # Get relative part by slicing if possible.
                if entry_path.startswith(norm_folderpath):
                    rel_part = entry_path[base_len:]
                else:
                    rel_part = os.path.relpath(entry_path, norm_folderpath)
                # Prepend the foldername so it becomes part of the stored relative path.
                relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
                print(relative_path)
                filetype = os.path.splitext(entry.name)[1].lower()
                transcript = None

                # Check for a corresponding transcript file in a sibling "Transkription" folder.
                parent_dir = os.path.dirname(entry_path)
                transcript_dir = os.path.join(parent_dir, "Transkription")
                transcript_filename = os.path.splitext(entry.name)[0] + ".md"
                transcript_path = os.path.join(transcript_dir, transcript_filename)
                if os.path.exists(transcript_path):
                    try:
                        with open(transcript_path, 'r', encoding='utf-8') as tf:
                            transcript = tf.read()
                    except Exception:
                        transcript = None

                scanned_files.append((relative_path, entry.name, filetype, transcript))
                current_keys.add((relative_path, entry.name))

            # Remove database entries for files under this base folder that are no longer on disk.
            pattern = foldername + os.sep + '%'
            cursor.execute("SELECT id, relative_path, filename FROM files WHERE relative_path LIKE ?", (pattern,))
            db_rows = cursor.fetchall()
            keys_in_db = set((row["relative_path"], row["filename"]) for row in db_rows)
            keys_to_delete = keys_in_db - current_keys
            for key in keys_to_delete:
                cursor.execute("DELETE FROM files WHERE relative_path = ? AND filename = ?", key)

            # Bulk write the scanned files using INSERT OR REPLACE.
            cursor.executemany(
                "INSERT OR REPLACE INTO files (relative_path, filename, filetype, transcript) VALUES (?, ?, ?, ?)",
                scanned_files
            )

            # Commit changes after processing this base folder.
            search_db.commit()

    return "File index updated successfully"


if __name__ == "__main__":
    init_db()  # Initialize the database schema if it doesn't exist
    updatefileindex()  # Update the file index
    search_db.close()  # Close the database connection
    print("Database connection closed.")