diff --git a/example_config_files/.env b/example_config_files/.env new file mode 100644 index 0000000..0d510b7 --- /dev/null +++ b/example_config_files/.env @@ -0,0 +1,2 @@ +CONTAINER_NAME=bethaus-app +HOST_RULE=Host(`your-app-domain.de`) diff --git a/example_config_files/app_config.json b/example_config_files/app_config.json new file mode 100644 index 0000000..d2a6010 --- /dev/null +++ b/example_config_files/app_config.json @@ -0,0 +1,12 @@ +{ + "SECRET_KEY": "THIS_IS_USED_TO_CREATE_SESSIONS", + "SALT": "THIS_IS_USED_TO_CREATE_TOKENS", + "ADMIN_KEY": "THIS_IS_USED_TO_AUTHENTICATE_ADMIN", + "TITLE_SHORT": "Gottesdienste", + "TITLE_LONG": "Gottesdienste App", + "BASE_DIR": "/mnt", + "filecache_size_limit_audio": 16, + "filecache_size_limit_image": 16, + "filecache_size_limit_video": 16, + "filecache_size_limit_other": 16 +} \ No newline at end of file diff --git a/example_config_files/folder_mount_config.json b/example_config_files/folder_mount_config.json new file mode 100644 index 0000000..8ef977c --- /dev/null +++ b/example_config_files/folder_mount_config.json @@ -0,0 +1,37 @@ +{ + "SERVER1": { + "SSH_USER": "root", + "SSH_SERVER": "your-first-site.de", + "SSH_SERVER_PORT": 1122, + "REMOTE_NFS_PORT": 2049, + "LOCAL_PORT_BASE": 2022, + "MOUNT_POINTS": [ + "/mnt/Gottesdienste", + "/mnt/Jugendgottesdienste", + "/mnt/Liedersammlung", + "/mnt/Hochzeiten" + ], + "NFS_SHARES": [ + "/volume2/Aufnahme-stereo/010 Gottesdienste", + "/volume2/Jugend/Gottesdienste Archiv", + "/volume2/Aufnahme-stereo/014 Liedersammlung", + "/volume2/Aufnahme-stereo/021 Hochzeiten" + ] + }, + "SERVER2": { + "SSH_USER": "root", + "SSH_SERVER": "your-second-site.de", + "SSH_SERVER_PORT": 1122, + "REMOTE_NFS_PORT": 2049, + "LOCAL_PORT_BASE": 3022, + "MOUNT_POINTS": [ + "/mnt/Gottesdienste", + "/mnt/Hochzeiten" + ], + "NFS_SHARES": [ + "/volume1/Aufnahme/010 Gottesdienste", + "/volume1/Aufnahme/020 Hochzeiten" + ] + } + } + \ No newline at end of file diff --git a/example_config_files/folder_secret_config.json b/example_config_files/folder_secret_config.json new file mode 100644 index 0000000..8acf62e --- /dev/null +++ b/example_config_files/folder_secret_config.json @@ -0,0 +1,26 @@ +[ + { + "secret": "Up741PkSVTTbXsDjd0OVjfbBmM3ggcYV", + "validity": "31.07.2025", + "folders": [ + { + "foldername": "Gottesdienste", + "folderpath": "/mnt/Gottesdienste" + }, + { + "foldername": "Jugendgottesdienste", + "folderpath": "/mnt/Jugendgottesdienste" + } + ] + }, + { + "secret": "PstSQSkfVT6r2CScEWyupqVAfVCDR6aq", + "validity": "21.08.2025", + "folders": [ + { + "foldername": "Missionskonferenz", + "folderpath": "/mnt/Missionskonferenz" + } + ] + } +] \ No newline at end of file diff --git a/example_config_files/transcription_config.yml b/example_config_files/transcription_config.yml new file mode 100644 index 0000000..884e77d --- /dev/null +++ b/example_config_files/transcription_config.yml @@ -0,0 +1,7 @@ +model_name: "medium" +gpu_only: false + +folder_list: + # Windows path example + - "\\\\10.1.0.11\\Aufnahme-stereo\\010 Gottesdienste ARCHIV" + - "\\\\10.1.0.11\\Jugend\\Gottesdienste Archiv" diff --git a/helperfunctions.py b/helperfunctions.py index f39356f..7ad94b1 100644 --- a/helperfunctions.py +++ b/helperfunctions.py @@ -4,6 +4,10 @@ import os import sqlite3 from datetime import datetime, timedelta from typing import Optional +import auth + +app_config = auth.return_app_config() +BASE_DIR = os.path.realpath(app_config['BASE_DIR']) log_db = sqlite3.connect("access_log.db", check_same_thread=False) @@ -143,15 +147,15 @@ def generate_top_list(category): cursor = log_db.execute(query, params_for_filter) rows = cursor.fetchall() - # Filter by allowed basefolders + # Filter by allowed base folders allowed_basefolders = list(session['folders'].keys()) rows = [ (rel_path, access_count) for rel_path, access_count in rows if any(rel_path.startswith(folder) for folder in allowed_basefolders) ] - # Convert rows to a list of dictionaries and add category - rows = [ + # Convert rows to a list of dicts and add category + records = [ { 'rel_path': rel_path, 'access_count': access_count, @@ -159,15 +163,18 @@ def generate_top_list(category): } for rel_path, access_count in rows ] - rows = [r for r in rows if r['category'] == category][:20] + # Filter by requested category and limit + records = [r for r in records if r['category'] == category][:20] - filelist = [ - { - 'name': rel_path.split('/')[-1], - 'path': rel_path, - 'file_type': 'music' - } - for rel_path in [r['rel_path'] for r in rows] - ] + # Build file list and check existence + filelist = [] + for record in records: + rel_path = record['rel_path'] + if os.path.exists(os.path.join(BASE_DIR, rel_path)): # ensure file exists on disk // slow operation. maybe improve later + filelist.append({ + 'name': os.path.basename(rel_path), + 'path': rel_path, + 'file_type': 'music' + }) return filelist diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..fb8a0bb --- /dev/null +++ b/readme.md @@ -0,0 +1,145 @@ +# Media Sharing App + +This is a self-hosted media sharing and indexing platform designed for secure and flexible file sharing. Originally created to serve specific community needs, it is now made available for general use under an open-source license. The application supports file access via time-limited secret links, metadata indexing, and transcription-based search. + +## Features + +- **Secret Link Sharing** + Share folders via unique URLs with expiration dates. Two levels of sharing: + - **Primary Folders (Secret Links):** Centralized configuration with full admin control. Suitable for public events. + - **Subfolders (Token Links):** Can be shared ad hoc, without central admin access. Ideal for limited-audience events. + +- **Transcription and Search** + - Local transcription of audio files + - Full-text search through transcripts + - Daily indexing for search via cron job required + +- **Caching and Performance** + - Data is fetched from a remote network storage via SSH + - Server side caching for fast repeat access + - Automatic cache invalidation for unused files + +- **Privacy-Aware Analytics** + - Anonymous access logging (no IP address storage) + +## Requirements + +- **Domain/Subdomain** for app access +- **Dynamic DNS** for local network tunnel access +- **Local Network Storage** (Linux server or NAS with SSH and NFS support) +- **Virtual Private Server (VPS)** with: + - Docker + - SSH + - NFS + - Cron + +## System Architecture + +```plaintext + ┌───────────────────────────┐ + │ Web Browser │ + │ (User / Admin Interface) │ + └────────────┬──────────────┘ + │ + ┌────────────▼──────────────┐ + │ HTTPS (Secret Link) │ + │ (Traefik) │ + └────────────┬──────────────┘ + │ + ┌────────────▼──────────────┐ + │ Flask App (VPS) │ + │ Routing Layer │ + └────────────┬──────────────┘ + │ + ┌────────────▼─────────────┐ + │ │ + ┌────────▼────────┐ ┌────────▼─────────┐ + │ User Views / │ │ Admin Interface │ + │ Token Logic │ │ (Protected Link)│ + └────────┬────────┘ └────────┬─────────┘ + │ │ + ┌───────▼──────────────────────────▼──────┐ + │ Link Validation & Expiry Logic │ + └───────────────────┬─────────────────────┘ + │ + ┌─────────────▼──────────────┐ + │ Caching Layer │ + │ (Local file cache) │ + └─────────────┬──────────────┘ + │ + ┌─────────────▼──────────────┐ + │ SSH Tunnel to Storage │ + │ (NAS or Linux Server) │ + └─────────────┬──────────────┘ + │ + ┌─────────────▼──────────────┐ ┌─────────────────────────────┐ + │ Local File System │◄─────┤ Media File Transcriber (PC) │ + └─────────────┬──────────────┘ └─────────────────────────────┘ + │ + ┌─────────────▼──────────────┐ + │ Daily Cron Service │ + │ - Index Filesystem │ + │ - Index Transcription │ + └─────────────┬──────────────┘ + │ + ┌─────────▼──────────┐ + │ Search Engine │ + └────────────────────┘ +``` + +- Files are served over an SSH tunnel from NAS to the VPS. +- Caching and indexing are performed on the VPS for performance. +- Admin and user features are activated via special access links. + +## Setup Instructions + +### 1. Clone the Repository to your VPS + +```bash +git clone https://gitea.centx.de/lelo/bethaus-app +cd your-app +``` + +### 2. Configure Your Environment + +Copy example config files and customize them for your setup: + +```bash +cd your-app +cd example_config_files +cp * ../ +``` + +### 3. Change Configurations + +- `.env` + Used by Docker Compose to manage environment variables. + +- `app_config.json` + General settings including search, link expiration, and admin key. + **Important:** Replace default keys with strong random strings and keep them secret. + +- `folder_mount_config.json` + Used if you set up `cron` to auto-mount folders. + +- `folder_secret_config.json` + Contains paths and secrets to the shared folders. + Add your first entry manually, then use the admin UI for further setup. + +- `transcription_config.yml` + Required for local transcription. Adjust based on your tools and resources. + +### 4. Launch the App + +```bash +docker compose up -d +``` + +### 5. Admin Access + +To unlock administrative controls on your device, use the dedicated admin access link. + +## Contribution + +This app is under active development and contributions are welcome. +If you'd like to adapt it for your own community or project, feel free to fork the repo and reach out for setup help. diff --git a/requirements_transcription.txt b/requirements_transcription.txt index bb2ef44..3642026 100644 --- a/requirements_transcription.txt +++ b/requirements_transcription.txt @@ -1,5 +1,8 @@ +pyaml +librosa openai-whisper #https://pytorch.org/get-started/locally/ -torch==2.5.1 -torchvision==0.20.1 -torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124 \ No newline at end of file +--extra-index-url https://download.pytorch.org/whl/cu128 +torch +torchvision +torchaudio \ No newline at end of file diff --git a/transcribe_all.py b/transcribe_all.py index e8df9af..dd01b36 100755 --- a/transcribe_all.py +++ b/transcribe_all.py @@ -19,6 +19,23 @@ with open("transcription_config.yml", "r", encoding="utf-8") as file: settings = yaml.safe_load(file) folder_list = settings.get("folder_list") model_name = settings.get("model_name") + gpu_only = settings.get("gpu_only", False) + +print("PyTorch version:", torch.__version__) +print("CUDA available?", torch.cuda.is_available()) +print("CUDA version:", torch.version.cuda) +print("GPU count:", torch.cuda.device_count()) +if torch.cuda.is_available(): + for i in range(torch.cuda.device_count()): + print(f" Device {i}:", torch.cuda.get_device_name(i)) + +if not folder_list or not model_name: + print("Error: Please check the transcription_config.yml file. It should contain 'folder_list' and 'model_name'.") + sys.exit(1) + +if gpu_only and not torch.cuda.is_available(): + print("Error: You requested to only use GPU but it is not available. Please check your PyTorch installation.") + sys.exit(1) def load_audio_librosa(path: str, sr: int = 16_000) -> np.ndarray: audio, orig_sr = librosa.load(path, sr=sr) # load + resample to 16 kHz