Compare commits

...

2 Commits

Author SHA1 Message Date
a7080dd767 remove crawl function 2025-04-18 20:42:08 +00:00
7c2b8e177f access directly from disk 2025-04-18 20:39:30 +00:00
2 changed files with 144 additions and 117 deletions

198
app.py
View File

@ -236,96 +236,115 @@ def api_browse(subpath):
@app.route("/media/<path:subpath>")
@auth.require_secret
def serve_file(subpath):
def serve_file(subpath):
# ─── 1) Locate the real file on disk ───
root, *relative_parts = subpath.split('/')
base_path = session['folders'][root]
full_path = os.path.join(base_path, *relative_parts)
base_path = session['folders'].get(root)
full_path = os.path.join(base_path or '', *relative_parts)
if not os.path.isfile(full_path):
app.logger.error(f"File not found: {full_path}")
return "File not found", 404
# ─── 2) Prep request info ───
mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream'
range_header = request.headers.get('Range')
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
is_cache_request = request.headers.get('X-Cache-Request') == 'true'
# Check cache first (using diskcache)
response = None
# determine the cache to use based on the file type
if mime and mime.startswith('audio/'):
cache = cache_audio
elif mime and mime.startswith('image/'):
cache = cache_image
elif mime and mime.startswith('video/'):
cache = cache_video
else:
cache = cache_other
# Check if the file is already cached
if is_cache_request:
logging = False
else:
logging = True
cached = cache.get(subpath)
if cached:
cached_file_bytes, mime = cached
cached_file = io.BytesIO(cached_file_bytes)
filesize = len(cached_file.getbuffer())
response = send_file(cached_file, mimetype=mime)
else:
if mime and mime.startswith('image/'):
# Image processing branch (with caching)
is_cache_request = request.headers.get('X-Cache-Request') == 'true'
ip_address = request.remote_addr
user_agent = request.headers.get('User-Agent')
# skip logging on cache hits or on audio GETs (per your rules)
do_log = not is_cache_request
if mime == 'audio/mpeg' and request.method != 'HEAD':
do_log = False
# ─── 3) Pick the right cache ───
if mime.startswith('audio/'): cache = cache_audio
elif mime.startswith('image/'): cache = cache_image
elif mime.startswith('video/'): cache = cache_video
else: cache = cache_other
# ─── 4) Try to stream directly from diskcache ───
try:
# returns a file-like whose .name is the real path on disk
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= mime,
conditional= True # enable Range / If-Modified / etc
)
cached_hit = True
except KeyError:
# cache miss → generate & write back to cache
cached_hit = False
if mime.startswith('image/'):
# ─── 4a) Image branch: thumbnail & cache the JPEG ───
try:
with Image.open(full_path) as img:
img.thumbnail((1920, 1920))
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
output_format = 'JPEG'
output_mime = 'image/jpeg'
save_kwargs = {'quality': 85}
img_bytes_io = io.BytesIO()
img.save(img_bytes_io, format=output_format, **save_kwargs)
thumb_bytes = img_bytes_io.getvalue()
filesize = len(thumb_bytes)
cache.set(subpath, (thumb_bytes, output_mime))
response = send_file(io.BytesIO(thumb_bytes), mimetype=output_mime, conditional=True)
thumb_io = io.BytesIO()
img.save(thumb_io, format='JPEG', quality=85)
thumb_io.seek(0)
# write thumbnail into diskcache as a real file
cache.set(subpath, thumb_io, read=True)
# now re-open from cache to get the on-disk path
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= 'image/jpeg',
conditional= True
)
except Exception as e:
app.logger.error(f"Image processing failed for {subpath}: {e}")
abort(500)
else:
# Cache non-image files: read bytes and cache
# ─── 4b) Non-image branch: cache original file ───
try:
with open(full_path, 'rb') as f:
file_bytes = f.read()
cache.set(subpath, (file_bytes, mime))
file_bytes_io = io.BytesIO(file_bytes)
filesize = len(file_bytes_io.getbuffer())
response = send_file(file_bytes_io, mimetype=mime, conditional=True)
# store the real file on diskcache
cache.set(subpath, open(full_path, 'rb'), read=True)
# read back to get its path
with cache.read(subpath) as reader:
file_path = reader.name
filesize = os.path.getsize(file_path)
response = send_file(
file_path,
mimetype= mime,
conditional= True
)
except Exception as e:
app.logger.error(f"Failed to read file {subpath}: {e}")
app.logger.error(f"Failed to cache file {subpath}: {e}")
abort(500)
# Set Cache-Control header (browser caching for 1 day)
# ─── 5) Common headers & logging ───
response.headers['Cache-Control'] = 'public, max-age=86400'
# special rules for audio files.
# HEAD request checks if the audio file is available. GET requests coming from the audi player itself and can be made multiple times.
# a HEAD request only for logging will be ignored because of rules before
if mime and mime.startswith('audio/mpeg') and request.method != 'HEAD':
logging = False
if logging:
a.log_file_access(subpath, filesize, mime, ip_address, user_agent, session['device_id'], bool(cached))
if do_log:
a.log_file_access(
subpath, filesize, mime,
ip_address, user_agent,
session['device_id'], cached_hit
)
return response
@app.route("/transcript/<path:subpath>")
@auth.require_secret
def get_transcript(subpath):
@ -341,61 +360,6 @@ def get_transcript(subpath):
content = f.read()
return content, 200, {'Content-Type': 'text/markdown; charset=utf-8'}
@app.route("/crawl/<path:subpath>")
@auth.require_secret
def crawl_and_cache(subpath):
"""
Crawls through a directory and caches each file.
For images, it creates a thumbnail (max 1200x1200) and caches the processed image.
For non-images, it simply reads and caches the file bytes.
"""
root, *relative_parts = subpath.split('/')
base_path = session['folders'][root]
full_path = os.path.join(base_path, *relative_parts)
cached_files = [] # List to hold cached file relative paths
# Walk through all subdirectories and files
for root, dirs, files in os.walk(full_path):
for filename in files:
full_path_file = os.path.join(root, filename)
# Skip if this file is already in the cache
if cache.get(full_path_file):
continue
# Determine the MIME type
mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream'
# Process image files differently
if mime.startswith('image/'):
try:
with Image.open(full_path) as img:
# Create a thumbnail (max 1200x1200)
img.thumbnail((1200, 1200))
img_bytes_io = io.BytesIO()
# Save processed image as PNG
img.save(img_bytes_io, format='PNG', quality=85)
img_bytes = img_bytes_io.getvalue()
# Cache the processed image bytes along with its mime type
cache.set(full_path_file, (img_bytes, mime))
cached_files.append(full_path_file)
except Exception as e:
app.logger.error(f"Image processing failed for {full_path_file}: {e}")
else:
# Process non-image files
try:
with open(full_path_file, 'rb') as f:
file_bytes = f.read()
cache.set(full_path_file, (file_bytes, mime))
cached_files.append(full_path_file)
except Exception as e:
app.logger.error(f"Failed to read file {full_path_file}: {e}")
# Return the list of cached files as a JSON response
return json.dumps({"cached_files": cached_files}, indent=4), 200
def query_recent_connections():
global clients_connected, background_thread_running
background_thread_running = True

63
migrate_cache.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""
migrate_cache.py
Migrate DiskCache caches from old inmemory entries (bytes, mime) into
new ondisk read=True entries so that Flask can send them via send_file(path).
Usage:
python migrate_cache.py /path/to/filecache_audio \
/path/to/filecache_image \
/path/to/filecache_video \
/path/to/filecache_other
"""
import io
import sys
from diskcache import Cache
def migrate_cache(cache_path):
"""
Walks every key in the cache at `cache_path`. If the value is a tuple of
(bytes, mime), rewrites it via read=True so DiskCache stores it as a file.
"""
print(f"➡ Migrating cache at {cache_path!r}")
cache = Cache(cache_path)
migrated = 0
skipped = 0
# Iterate keys without loading everything into memory
for key in cache.iterkeys():
try:
val = cache.get(key)
except Exception as e:
print(f" [ERROR] key={key!r} get failed: {e}")
continue
# Detect oldstyle entries: (bytes, mime)
if (
isinstance(val, tuple)
and len(val) == 2
and isinstance(val[0], (bytes, bytearray))
and isinstance(val[1], str)
):
data, mime = val
buf = io.BytesIO(data)
buf.seek(0)
# Restore as an ondisk file
cache.set(key, buf, read=True)
migrated += 1
else:
skipped += 1
cache.close()
print(f" → Done: migrated={migrated}, skipped={skipped}\n")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: migrate_cache.py <cache_dir1> [<cache_dir2> ...]")
sys.exit(1)
for directory in sys.argv[1:]:
migrate_cache(directory)