access directly from disk

This commit is contained in:
lelo 2025-04-18 20:39:30 +00:00
parent bec7f593fa
commit 7c2b8e177f
2 changed files with 144 additions and 62 deletions

143
app.py
View File

@ -236,96 +236,115 @@ def api_browse(subpath):
@app.route("/media/<path:subpath>")
@auth.require_secret
def serve_file(subpath):
def serve_file(subpath):
# ─── 1) Locate the real file on disk ───
root, *relative_parts = subpath.split('/')
base_path = session['folders'][root]
full_path = os.path.join(base_path, *relative_parts)
base_path = session['folders'].get(root)
full_path = os.path.join(base_path or '', *relative_parts)
if not os.path.isfile(full_path):
app.logger.error(f"File not found: {full_path}")
return "File not found", 404
# ─── 2) Prep request info ───
mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream'
range_header = request.headers.get('Range')
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
is_cache_request = request.headers.get('X-Cache-Request') == 'true'
# Check cache first (using diskcache)
response = None
# determine the cache to use based on the file type
if mime and mime.startswith('audio/'):
cache = cache_audio
elif mime and mime.startswith('image/'):
cache = cache_image
elif mime and mime.startswith('video/'):
cache = cache_video
else:
cache = cache_other
# Check if the file is already cached
if is_cache_request:
logging = False
else:
logging = True
cached = cache.get(subpath)
if cached:
cached_file_bytes, mime = cached
cached_file = io.BytesIO(cached_file_bytes)
filesize = len(cached_file.getbuffer())
response = send_file(cached_file, mimetype=mime)
else:
if mime and mime.startswith('image/'):
# Image processing branch (with caching)
is_cache_request = request.headers.get('X-Cache-Request') == 'true'
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
# skip logging on cache hits or on audio GETs (per your rules)
do_log = not is_cache_request
if mime == 'audio/mpeg' and request.method != 'HEAD':
do_log = False
# ─── 3) Pick the right cache ───
if mime.startswith('audio/'): cache = cache_audio
elif mime.startswith('image/'): cache = cache_image
elif mime.startswith('video/'): cache = cache_video
else: cache = cache_other
# ─── 4) Try to stream directly from diskcache ───
try:
# returns a file-like whose .name is the real path on disk
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= mime,
conditional= True # enable Range / If-Modified / etc
)
cached_hit = True
except KeyError:
# cache miss → generate & write back to cache
cached_hit = False
if mime.startswith('image/'):
# ─── 4a) Image branch: thumbnail & cache the JPEG ───
try:
with Image.open(full_path) as img:
img.thumbnail((1920, 1920))
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
output_format = 'JPEG'
output_mime = 'image/jpeg'
save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO()
img.save(img_bytes_io, format=output_format, **save_kwargs)
thumb_bytes = img_bytes_io.getvalue()
filesize = len(thumb_bytes)
cache.set(subpath, (thumb_bytes, output_mime))
response = send_file(io.BytesIO(thumb_bytes), mimetype=output_mime, conditional=True)
thumb_io = io.BytesIO()
img.save(thumb_io, format='JPEG', quality=85)
thumb_io.seek(0)
# write thumbnail into diskcache as a real file
cache.set(subpath, thumb_io, read=True)
# now re-open from cache to get the on-disk path
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= 'image/jpeg',
conditional= True
)
except Exception as e:
app.logger.error(f"Image processing failed for {subpath}: {e}")
abort(500)
else:
# Cache non-image files: read bytes and cache
# ─── 4b) Non-image branch: cache original file ───
try:
with open(full_path, 'rb') as f:
file_bytes = f.read()
cache.set(subpath, (file_bytes, mime))
file_bytes_io = io.BytesIO(file_bytes)
filesize = len(file_bytes_io.getbuffer())
response = send_file(file_bytes_io, mimetype=mime, conditional=True)
# store the real file on diskcache
cache.set(subpath, open(full_path, 'rb'), read=True)
# read back to get its path
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= mime,
conditional= True
)
except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}")
app.logger.error(f"Failed to cache file {subpath}: {e}")
abort(500)
# Set Cache-Control header (browser caching for 1 day)
# ─── 5) Common headers & logging ───
response.headers['Cache-Control'] = 'public, max-age=86400'
# special rules for audio files.
# HEAD request checks if the audio file is available. GET requests coming from the audi player itself and can be made multiple times.
# a HEAD request only for logging will be ignored because of rules before
if mime and mime.startswith('audio/mpeg') and request.method != 'HEAD':
logging = False
if logging:
a.log_file_access(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached))
if do_log:
a.log_file_access(
subpath, filesize, mime,
ip_address, user_agent,
session['device_id'], cached_hit
)
return response
@app.route("/transcript/<path:subpath>")
@auth.require_secret
def get_transcript(subpath):

63
migrate_cache.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""
migrate_cache.py
Migrate DiskCache caches from old inmemory entries (bytes, mime) into
new ondisk read=True entries so that Flask can send them via send_file(path).
Usage:
python migrate_cache.py /path/to/filecache_audio \
/path/to/filecache_image \
/path/to/filecache_video \
/path/to/filecache_other
"""
import io
import sys
from diskcache import Cache
def migrate_cache(cache_path):
"""
Walks every key in the cache at `cache_path`. If the value is a tuple of
(bytes, mime), rewrites it via read=True so DiskCache stores it as a file.
"""
print(f"➡ Migrating cache at {cache_path!r}")
cache = Cache(cache_path)
migrated = 0
skipped = 0
# Iterate keys without loading everything into memory
for key in cache.iterkeys():
try:
val = cache.get(key)
except Exception as e:
print(f" [ERROR] key={key!r} get failed: {e}")
continue
# Detect oldstyle entries: (bytes, mime)
if (
isinstance(val, tuple)
and len(val) == 2
and isinstance(val[0], (bytes, bytearray))
and isinstance(val[1], str)
):
data, mime = val
buf = io.BytesIO(data)
buf.seek(0)
# Restore as an ondisk file
cache.set(key, buf, read=True)
migrated += 1
else:
skipped += 1
cache.close()
print(f" → Done: migrated={migrated}, skipped={skipped}\n")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: migrate_cache.py <cache_dir1> [<cache_dir2> ...]")
sys.exit(1)
for directory in sys.argv[1:]:
migrate_cache(directory)