remove crawl function

access directly from disk
2025-04-18 20:42:08 +00:00 · 2025-04-18 20:39:30 +00:00
2 changed files with 144 additions and 117 deletions
--- a/app.py
+++ b/app.py
@ -236,96 +236,115 @@ def api_browse(subpath):
@app.route("/media/<path:subpath>")
@auth.require_secret
-def serve_file(subpath):               
+def serve_file(subpath):
    # ─── 1) Locate the real file on disk ───
    root, *relative_parts = subpath.split('/')
-    base_path = session['folders'][root]
+    base_path = session['folders'].get(root)
-    full_path = os.path.join(base_path, *relative_parts)
+    full_path = os.path.join(base_path or '', *relative_parts)
-    
+
    if not os.path.isfile(full_path):
        app.logger.error(f"File not found: {full_path}")
        return "File not found", 404
    # ─── 2) Prep request info ───
    mime, _ = mimetypes.guess_type(full_path)
    mime = mime or 'application/octet-stream'
    range_header = request.headers.get('Range')
    ip_address = request.remote_addr
    user_agent = request.headers.get('User-Agent')
    is_cache_request = request.headers.get('X-Cache-Request') == 'true'
-    # Check cache first (using diskcache)
+    is_cache_request = request.headers.get('X-Cache-Request') == 'true'
-    response = None
+    ip_address       = request.remote_addr
-    
+    user_agent       = request.headers.get('User-Agent')
-    # determine the cache to use based on the file type
+
-    if mime and mime.startswith('audio/'):
+    # skip logging on cache hits or on audio GETs (per your rules)
-        cache = cache_audio
+    do_log = not is_cache_request
-    elif mime and mime.startswith('image/'):
+    if mime == 'audio/mpeg' and request.method != 'HEAD':
-        cache = cache_image
+        do_log = False
-    elif mime and mime.startswith('video/'):
+
-        cache = cache_video
+    # ─── 3) Pick the right cache ───
-    else:
+    if   mime.startswith('audio/'): cache = cache_audio
-        cache = cache_other
+    elif mime.startswith('image/'): cache = cache_image
-        
+    elif mime.startswith('video/'): cache = cache_video
-    # Check if the file is already cached
+    else:                          cache = cache_other
-    if is_cache_request:
+
-        logging = False
+    # ─── 4) Try to stream directly from diskcache ───
-    else:
+    try:
-        logging = True
+        # returns a file-like whose .name is the real path on disk
-        
+        with cache.read(subpath) as reader:
-    cached = cache.get(subpath)
+            file_path = reader.name
-    if cached:
+            filesize  = os.path.getsize(file_path)
-        cached_file_bytes, mime = cached
+            response  = send_file(
-        cached_file = io.BytesIO(cached_file_bytes)
+                file_path,
-        filesize = len(cached_file.getbuffer())
+                mimetype= mime,
-        response = send_file(cached_file, mimetype=mime)
+                conditional= True   # enable Range / If-Modified / etc
-    else:
+            )
-        if mime and mime.startswith('image/'):
+            cached_hit = True
-            # Image processing branch (with caching)
+
    except KeyError:
        # cache miss → generate & write back to cache
        cached_hit = False
        if mime.startswith('image/'):
            # ─── 4a) Image branch: thumbnail & cache the JPEG ───
            try:
                with Image.open(full_path) as img:
                    img.thumbnail((1920, 1920))
                    if img.mode in ("RGBA", "P"):
                        img = img.convert("RGB")
                    output_format = 'JPEG'
                    output_mime = 'image/jpeg'
                    save_kwargs = {'quality': 85}
-                    img_bytes_io = io.BytesIO()
+                    thumb_io = io.BytesIO()
-                    img.save(img_bytes_io, format=output_format, **save_kwargs)
+                    img.save(thumb_io, format='JPEG', quality=85)
-                    thumb_bytes = img_bytes_io.getvalue()
+                    thumb_io.seek(0)
-                    filesize = len(thumb_bytes)
+
-                    cache.set(subpath, (thumb_bytes, output_mime))
+                    # write thumbnail into diskcache as a real file
-                    response = send_file(io.BytesIO(thumb_bytes), mimetype=output_mime, conditional=True)
+                    cache.set(subpath, thumb_io, read=True)
                # now re-open from cache to get the on-disk path
                with cache.read(subpath) as reader:
                    file_path = reader.name
                    filesize  = os.path.getsize(file_path)
                    response  = send_file(
                        file_path,
                        mimetype= 'image/jpeg',
                        conditional= True
                    )
            except Exception as e:
                app.logger.error(f"Image processing failed for {subpath}: {e}")
                abort(500)
        else:
-            # Cache non-image files: read bytes and cache
+            # ─── 4b) Non-image branch: cache original file ───
            try:
-                with open(full_path, 'rb') as f:
+                # store the real file on diskcache
-                    file_bytes = f.read()
+                cache.set(subpath, open(full_path, 'rb'), read=True)
-                cache.set(subpath, (file_bytes, mime))
+
-                file_bytes_io = io.BytesIO(file_bytes)
+                # read back to get its path
-                filesize = len(file_bytes_io.getbuffer())
+                with cache.read(subpath) as reader:
-                response = send_file(file_bytes_io, mimetype=mime, conditional=True)
+                    file_path = reader.name
                    filesize  = os.path.getsize(file_path)
                    response  = send_file(
                        file_path,
                        mimetype= mime,
                        conditional= True
                    )
            except Exception as e:
-                app.logger.error(f"Failed to read file {subpath}: {e}")
+                app.logger.error(f"Failed to cache file {subpath}: {e}")
                abort(500)
-    # Set Cache-Control header (browser caching for 1 day)
+    # ─── 5) Common headers & logging ───
    response.headers['Cache-Control'] = 'public, max-age=86400'
-    # special rules for audio files.
+    if do_log:
-    # HEAD request checks if the audio file is available. GET requests coming from the audi player itself and can be made multiple times.
+        a.log_file_access(
-    # a HEAD request only for logging will be ignored because of rules before
+            subpath, filesize, mime,
-    if mime and mime.startswith('audio/mpeg') and request.method != 'HEAD': 
+            ip_address, user_agent,
-        logging = False
+            session['device_id'], cached_hit
-
+        )
    if logging:
        a.log_file_access(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached))
    return response
@app.route("/transcript/<path:subpath>")
@auth.require_secret
 def get_transcript(subpath):
@ -341,61 +360,6 @@ def get_transcript(subpath):
        content = f.read()
    return content, 200, {'Content-Type': 'text/markdown; charset=utf-8'}
@app.route("/crawl/<path:subpath>")
@auth.require_secret
 def crawl_and_cache(subpath):
    """
    Crawls through a directory and caches each file.
    For images, it creates a thumbnail (max 1200x1200) and caches the processed image.
    For non-images, it simply reads and caches the file bytes.
    """
    root, *relative_parts = subpath.split('/')
    base_path = session['folders'][root]
    full_path = os.path.join(base_path, *relative_parts)
    cached_files = []  # List to hold cached file relative paths
    # Walk through all subdirectories and files
    for root, dirs, files in os.walk(full_path):
        for filename in files:
            full_path_file = os.path.join(root, filename)
            # Skip if this file is already in the cache
            if cache.get(full_path_file):
                continue
            # Determine the MIME type
            mime, _ = mimetypes.guess_type(full_path)
            mime = mime or 'application/octet-stream'
            # Process image files differently
            if mime.startswith('image/'):
                try:
                    with Image.open(full_path) as img:
                        # Create a thumbnail (max 1200x1200)
                        img.thumbnail((1200, 1200))
                        img_bytes_io = io.BytesIO()
                        # Save processed image as PNG
                        img.save(img_bytes_io, format='PNG', quality=85)
                        img_bytes = img_bytes_io.getvalue()
                        # Cache the processed image bytes along with its mime type
                        cache.set(full_path_file, (img_bytes, mime))
                        cached_files.append(full_path_file)
                except Exception as e:
                    app.logger.error(f"Image processing failed for {full_path_file}: {e}")
            else:
                # Process non-image files
                try:
                    with open(full_path_file, 'rb') as f:
                        file_bytes = f.read()
                    cache.set(full_path_file, (file_bytes, mime))
                    cached_files.append(full_path_file)
                except Exception as e:
                    app.logger.error(f"Failed to read file {full_path_file}: {e}")
    # Return the list of cached files as a JSON response
    return json.dumps({"cached_files": cached_files}, indent=4), 200
 def query_recent_connections():
    global clients_connected, background_thread_running
    background_thread_running = True
--- a/migrate_cache.py
+++ b/migrate_cache.py
@ -0,0 +1,63 @@
 #!/usr/bin/env python3
 """
 migrate_cache.py
 Migrate DiskCache caches from “old” in‐memory entries (bytes, mime) into
 “new” on‐disk read=True entries so that Flask can send them via send_file(path).
 Usage:
    python migrate_cache.py /path/to/filecache_audio \
                           /path/to/filecache_image \
                           /path/to/filecache_video \
                           /path/to/filecache_other
 """
 import io
 import sys
 from diskcache import Cache
 def migrate_cache(cache_path):
    """
    Walks every key in the cache at `cache_path`.  If the value is a tuple of
    (bytes, mime), re‐writes it via read=True so DiskCache stores it as a file.
    """
    print(f"➡ Migrating cache at {cache_path!r}")
    cache = Cache(cache_path)
    migrated = 0
    skipped  = 0
    # Iterate keys without loading everything into memory
    for key in cache.iterkeys():
        try:
            val = cache.get(key)
        except Exception as e:
            print(f"  [ERROR] key={key!r} get failed: {e}")
            continue
        # Detect old‐style entries: (bytes, mime)
        if (
            isinstance(val, tuple)
            and len(val) == 2
            and isinstance(val[0], (bytes, bytearray))
            and isinstance(val[1], str)
        ):
            data, mime = val
            buf = io.BytesIO(data)
            buf.seek(0)
            # Re‐store as an on‐disk file
            cache.set(key, buf, read=True)
            migrated += 1
        else:
            skipped += 1
    cache.close()
    print(f"   → Done: migrated={migrated}, skipped={skipped}\n")
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: migrate_cache.py <cache_dir1> [<cache_dir2> ...]")
        sys.exit(1)
    for directory in sys.argv[1:]:
        migrate_cache(directory)
Author	SHA1	Message	Date
lelo	a7080dd767	remove crawl function	2025-04-18 20:42:08 +00:00
lelo	7c2b8e177f	access directly from disk	2025-04-18 20:39:30 +00:00