access directly from disk

This commit is contained in:
lelo 2025-04-18 20:39:30 +00:00
parent bec7f593fa
commit 7c2b8e177f
2 changed files with 144 additions and 62 deletions

143
app.py
View File

@ -236,96 +236,115 @@ def api_browse(subpath):
@app.route("/media/<path:subpath>") @app.route("/media/<path:subpath>")
@auth.require_secret @auth.require_secret
def serve_file(subpath): def serve_file(subpath):
# ─── 1) Locate the real file on disk ───
root, *relative_parts = subpath.split('/') root, *relative_parts = subpath.split('/')
base_path = session['folders'][root] base_path = session['folders'].get(root)
full_path = os.path.join(base_path, *relative_parts) full_path = os.path.join(base_path or '', *relative_parts)
if not os.path.isfile(full_path): if not os.path.isfile(full_path):
app.logger.error(f"File not found: {full_path}") app.logger.error(f"File not found: {full_path}")
return "File not found", 404 return "File not found", 404
# ─── 2) Prep request info ───
mime, _ = mimetypes.guess_type(full_path) mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream' mime = mime or 'application/octet-stream'
range_header = request.headers.get('Range')
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
is_cache_request = request.headers.get('X-Cache-Request') == 'true'
# Check cache first (using diskcache) is_cache_request = request.headers.get('X-Cache-Request') == 'true'
response = None ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
# determine the cache to use based on the file type
if mime and mime.startswith('audio/'): # skip logging on cache hits or on audio GETs (per your rules)
cache = cache_audio do_log = not is_cache_request
elif mime and mime.startswith('image/'): if mime == 'audio/mpeg' and request.method != 'HEAD':
cache = cache_image do_log = False
elif mime and mime.startswith('video/'):
cache = cache_video # ─── 3) Pick the right cache ───
else: if mime.startswith('audio/'): cache = cache_audio
cache = cache_other elif mime.startswith('image/'): cache = cache_image
elif mime.startswith('video/'): cache = cache_video
# Check if the file is already cached else: cache = cache_other
if is_cache_request:
logging = False # ─── 4) Try to stream directly from diskcache ───
else: try:
logging = True # returns a file-like whose .name is the real path on disk
with cache.read(subpath) as reader:
cached = cache.get(subpath) file_path = reader.name
if cached: filesize = os.path.getsize(file_path)
cached_file_bytes, mime = cached response = send_file(
cached_file = io.BytesIO(cached_file_bytes) file_path,
filesize = len(cached_file.getbuffer()) mimetype= mime,
response = send_file(cached_file, mimetype=mime) conditional= True # enable Range / If-Modified / etc
else: )
if mime and mime.startswith('image/'): cached_hit = True
# Image processing branch (with caching)
except KeyError:
# cache miss → generate & write back to cache
cached_hit = False
if mime.startswith('image/'):
# ─── 4a) Image branch: thumbnail & cache the JPEG ───
try: try:
with Image.open(full_path) as img: with Image.open(full_path) as img:
img.thumbnail((1920, 1920)) img.thumbnail((1920, 1920))
if img.mode in ("RGBA", "P"): if img.mode in ("RGBA", "P"):
img = img.convert("RGB") img = img.convert("RGB")
output_format = 'JPEG'
output_mime = 'image/jpeg'
save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO() thumb_io = io.BytesIO()
img.save(img_bytes_io, format=output_format, **save_kwargs) img.save(thumb_io, format='JPEG', quality=85)
thumb_bytes = img_bytes_io.getvalue() thumb_io.seek(0)
filesize = len(thumb_bytes)
cache.set(subpath, (thumb_bytes, output_mime)) # write thumbnail into diskcache as a real file
response = send_file(io.BytesIO(thumb_bytes), mimetype=output_mime, conditional=True) cache.set(subpath, thumb_io, read=True)
# now re-open from cache to get the on-disk path
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= 'image/jpeg',
conditional= True
)
except Exception as e: except Exception as e:
app.logger.error(f"Image processing failed for {subpath}: {e}") app.logger.error(f"Image processing failed for {subpath}: {e}")
abort(500) abort(500)
else: else:
# Cache non-image files: read bytes and cache # ─── 4b) Non-image branch: cache original file ───
try: try:
with open(full_path, 'rb') as f: # store the real file on diskcache
file_bytes = f.read() cache.set(subpath, open(full_path, 'rb'), read=True)
cache.set(subpath, (file_bytes, mime))
file_bytes_io = io.BytesIO(file_bytes) # read back to get its path
filesize = len(file_bytes_io.getbuffer()) with cache.read(subpath) as reader:
response = send_file(file_bytes_io, mimetype=mime, conditional=True) file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= mime,
conditional= True
)
except Exception as e: except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}") app.logger.error(f"Failed to cache file {subpath}: {e}")
abort(500) abort(500)
# Set Cache-Control header (browser caching for 1 day) # ─── 5) Common headers & logging ───
response.headers['Cache-Control'] = 'public, max-age=86400' response.headers['Cache-Control'] = 'public, max-age=86400'
# special rules for audio files. if do_log:
# HEAD request checks if the audio file is available. GET requests coming from the audi player itself and can be made multiple times. a.log_file_access(
# a HEAD request only for logging will be ignored because of rules before subpath, filesize, mime,
if mime and mime.startswith('audio/mpeg') and request.method != 'HEAD': ip_address, user_agent,
logging = False session['device_id'], cached_hit
)
if logging:
a.log_file_access(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached))
return response return response
@app.route("/transcript/<path:subpath>") @app.route("/transcript/<path:subpath>")
@auth.require_secret @auth.require_secret
def get_transcript(subpath): def get_transcript(subpath):

63
migrate_cache.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""
migrate_cache.py
Migrate DiskCache caches from old inmemory entries (bytes, mime) into
new ondisk read=True entries so that Flask can send them via send_file(path).
Usage:
python migrate_cache.py /path/to/filecache_audio \
/path/to/filecache_image \
/path/to/filecache_video \
/path/to/filecache_other
"""
import io
import sys
from diskcache import Cache
def migrate_cache(cache_path):
"""
Walks every key in the cache at `cache_path`. If the value is a tuple of
(bytes, mime), rewrites it via read=True so DiskCache stores it as a file.
"""
print(f"➡ Migrating cache at {cache_path!r}")
cache = Cache(cache_path)
migrated = 0
skipped = 0
# Iterate keys without loading everything into memory
for key in cache.iterkeys():
try:
val = cache.get(key)
except Exception as e:
print(f" [ERROR] key={key!r} get failed: {e}")
continue
# Detect oldstyle entries: (bytes, mime)
if (
isinstance(val, tuple)
and len(val) == 2
and isinstance(val[0], (bytes, bytearray))
and isinstance(val[1], str)
):
data, mime = val
buf = io.BytesIO(data)
buf.seek(0)
# Restore as an ondisk file
cache.set(key, buf, read=True)
migrated += 1
else:
skipped += 1
cache.close()
print(f" → Done: migrated={migrated}, skipped={skipped}\n")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: migrate_cache.py <cache_dir1> [<cache_dir2> ...]")
sys.exit(1)
for directory in sys.argv[1:]:
migrate_cache(directory)