Compare commits

..

2 Commits

Author SHA1 Message Date
a7080dd767 remove crawl function 2025-04-18 20:42:08 +00:00
7c2b8e177f access directly from disk 2025-04-18 20:39:30 +00:00
2 changed files with 144 additions and 117 deletions

198
app.py
View File

@ -236,96 +236,115 @@ def api_browse(subpath):
@app.route("/media/<path:subpath>") @app.route("/media/<path:subpath>")
@auth.require_secret @auth.require_secret
def serve_file(subpath): def serve_file(subpath):
# ─── 1) Locate the real file on disk ───
root, *relative_parts = subpath.split('/') root, *relative_parts = subpath.split('/')
base_path = session['folders'][root] base_path = session['folders'].get(root)
full_path = os.path.join(base_path, *relative_parts) full_path = os.path.join(base_path or '', *relative_parts)
if not os.path.isfile(full_path): if not os.path.isfile(full_path):
app.logger.error(f"File not found: {full_path}") app.logger.error(f"File not found: {full_path}")
return "File not found", 404 return "File not found", 404
# ─── 2) Prep request info ───
mime, _ = mimetypes.guess_type(full_path) mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream' mime = mime or 'application/octet-stream'
range_header = request.headers.get('Range')
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
is_cache_request = request.headers.get('X-Cache-Request') == 'true'
# Check cache first (using diskcache) is_cache_request = request.headers.get('X-Cache-Request') == 'true'
response = None ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
# determine the cache to use based on the file type
if mime and mime.startswith('audio/'): # skip logging on cache hits or on audio GETs (per your rules)
cache = cache_audio do_log = not is_cache_request
elif mime and mime.startswith('image/'): if mime == 'audio/mpeg' and request.method != 'HEAD':
cache = cache_image do_log = False
elif mime and mime.startswith('video/'):
cache = cache_video # ─── 3) Pick the right cache ───
else: if mime.startswith('audio/'): cache = cache_audio
cache = cache_other elif mime.startswith('image/'): cache = cache_image
elif mime.startswith('video/'): cache = cache_video
# Check if the file is already cached else: cache = cache_other
if is_cache_request:
logging = False # ─── 4) Try to stream directly from diskcache ───
else: try:
logging = True # returns a file-like whose .name is the real path on disk
with cache.read(subpath) as reader:
cached = cache.get(subpath) file_path = reader.name
if cached: filesize = os.path.getsize(file_path)
cached_file_bytes, mime = cached response = send_file(
cached_file = io.BytesIO(cached_file_bytes) file_path,
filesize = len(cached_file.getbuffer()) mimetype= mime,
response = send_file(cached_file, mimetype=mime) conditional= True # enable Range / If-Modified / etc
else: )
if mime and mime.startswith('image/'): cached_hit = True
# Image processing branch (with caching)
except KeyError:
# cache miss → generate & write back to cache
cached_hit = False
if mime.startswith('image/'):
# ─── 4a) Image branch: thumbnail & cache the JPEG ───
try: try:
with Image.open(full_path) as img: with Image.open(full_path) as img:
img.thumbnail((1920, 1920)) img.thumbnail((1920, 1920))
if img.mode in ("RGBA", "P"): if img.mode in ("RGBA", "P"):
img = img.convert("RGB") img = img.convert("RGB")
output_format = 'JPEG'
output_mime = 'image/jpeg'
save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO() thumb_io = io.BytesIO()
img.save(img_bytes_io, format=output_format, **save_kwargs) img.save(thumb_io, format='JPEG', quality=85)
thumb_bytes = img_bytes_io.getvalue() thumb_io.seek(0)
filesize = len(thumb_bytes)
cache.set(subpath, (thumb_bytes, output_mime)) # write thumbnail into diskcache as a real file
response = send_file(io.BytesIO(thumb_bytes), mimetype=output_mime, conditional=True) cache.set(subpath, thumb_io, read=True)
# now re-open from cache to get the on-disk path
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= 'image/jpeg',
conditional= True
)
except Exception as e: except Exception as e:
app.logger.error(f"Image processing failed for {subpath}: {e}") app.logger.error(f"Image processing failed for {subpath}: {e}")
abort(500) abort(500)
else: else:
# Cache non-image files: read bytes and cache # ─── 4b) Non-image branch: cache original file ───
try: try:
with open(full_path, 'rb') as f: # store the real file on diskcache
file_bytes = f.read() cache.set(subpath, open(full_path, 'rb'), read=True)
cache.set(subpath, (file_bytes, mime))
file_bytes_io = io.BytesIO(file_bytes) # read back to get its path
filesize = len(file_bytes_io.getbuffer()) with cache.read(subpath) as reader:
response = send_file(file_bytes_io, mimetype=mime, conditional=True) file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= mime,
conditional= True
)
except Exception as e: except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}") app.logger.error(f"Failed to cache file {subpath}: {e}")
abort(500) abort(500)
# Set Cache-Control header (browser caching for 1 day) # ─── 5) Common headers & logging ───
response.headers['Cache-Control'] = 'public, max-age=86400' response.headers['Cache-Control'] = 'public, max-age=86400'
# special rules for audio files. if do_log:
# HEAD request checks if the audio file is available. GET requests coming from the audi player itself and can be made multiple times. a.log_file_access(
# a HEAD request only for logging will be ignored because of rules before subpath, filesize, mime,
if mime and mime.startswith('audio/mpeg') and request.method != 'HEAD': ip_address, user_agent,
logging = False session['device_id'], cached_hit
)
if logging:
a.log_file_access(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached))
return response return response
@app.route("/transcript/<path:subpath>") @app.route("/transcript/<path:subpath>")
@auth.require_secret @auth.require_secret
def get_transcript(subpath): def get_transcript(subpath):
@ -341,61 +360,6 @@ def get_transcript(subpath):
content = f.read() content = f.read()
return content, 200, {'Content-Type': 'text/markdown; charset=utf-8'} return content, 200, {'Content-Type': 'text/markdown; charset=utf-8'}
@app.route("/crawl/<path:subpath>")
@auth.require_secret
def crawl_and_cache(subpath):
"""
Crawls through a directory and caches each file.
For images, it creates a thumbnail (max 1200x1200) and caches the processed image.
For non-images, it simply reads and caches the file bytes.
"""
root, *relative_parts = subpath.split('/')
base_path = session['folders'][root]
full_path = os.path.join(base_path, *relative_parts)
cached_files = [] # List to hold cached file relative paths
# Walk through all subdirectories and files
for root, dirs, files in os.walk(full_path):
for filename in files:
full_path_file = os.path.join(root, filename)
# Skip if this file is already in the cache
if cache.get(full_path_file):
continue
# Determine the MIME type
mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream'
# Process image files differently
if mime.startswith('image/'):
try:
with Image.open(full_path) as img:
# Create a thumbnail (max 1200x1200)
img.thumbnail((1200, 1200))
img_bytes_io = io.BytesIO()
# Save processed image as PNG
img.save(img_bytes_io, format='PNG', quality=85)
img_bytes = img_bytes_io.getvalue()
# Cache the processed image bytes along with its mime type
cache.set(full_path_file, (img_bytes, mime))
cached_files.append(full_path_file)
except Exception as e:
app.logger.error(f"Image processing failed for {full_path_file}: {e}")
else:
# Process non-image files
try:
with open(full_path_file, 'rb') as f:
file_bytes = f.read()
cache.set(full_path_file, (file_bytes, mime))
cached_files.append(full_path_file)
except Exception as e:
app.logger.error(f"Failed to read file {full_path_file}: {e}")
# Return the list of cached files as a JSON response
return json.dumps({"cached_files": cached_files}, indent=4), 200
def query_recent_connections(): def query_recent_connections():
global clients_connected, background_thread_running global clients_connected, background_thread_running
background_thread_running = True background_thread_running = True

63
migrate_cache.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""
migrate_cache.py
Migrate DiskCache caches from old inmemory entries (bytes, mime) into
new ondisk read=True entries so that Flask can send them via send_file(path).
Usage:
python migrate_cache.py /path/to/filecache_audio \
/path/to/filecache_image \
/path/to/filecache_video \
/path/to/filecache_other
"""
import io
import sys
from diskcache import Cache
def migrate_cache(cache_path):
"""
Walks every key in the cache at `cache_path`. If the value is a tuple of
(bytes, mime), rewrites it via read=True so DiskCache stores it as a file.
"""
print(f"➡ Migrating cache at {cache_path!r}")
cache = Cache(cache_path)
migrated = 0
skipped = 0
# Iterate keys without loading everything into memory
for key in cache.iterkeys():
try:
val = cache.get(key)
except Exception as e:
print(f" [ERROR] key={key!r} get failed: {e}")
continue
# Detect oldstyle entries: (bytes, mime)
if (
isinstance(val, tuple)
and len(val) == 2
and isinstance(val[0], (bytes, bytearray))
and isinstance(val[1], str)
):
data, mime = val
buf = io.BytesIO(data)
buf.seek(0)
# Restore as an ondisk file
cache.set(key, buf, read=True)
migrated += 1
else:
skipped += 1
cache.close()
print(f" → Done: migrated={migrated}, skipped={skipped}\n")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: migrate_cache.py <cache_dir1> [<cache_dir2> ...]")
sys.exit(1)
for directory in sys.argv[1:]:
migrate_cache(directory)