partial cache

This commit is contained in:
lelo 2025-12-15 08:11:56 +00:00
parent cba6b94508
commit 7b1c97d066

120
app.py
View File

@ -481,23 +481,76 @@ def serve_file(subpath):
)
return response
# 5) Non-image branch: ensure original is cached
# 5) Non-image branch: check if cached, otherwise create partial cache file
try:
with cache.read(subpath) as reader:
file_path = reader.name
cached_hit = True
except KeyError:
cached_hit = False
try:
cache.set(subpath, open(full_path, 'rb'), read=True)
with cache.read(subpath) as reader:
file_path = reader.name
except Exception as e:
app.logger.error(f"Failed to cache file {subpath}: {e}")
abort(500)
# Create a temporary cache file that we'll write to and serve simultaneously
import hashlib
import tempfile
# Generate cache key similar to diskcache
cache_key = hashlib.md5(subpath.encode('utf-8')).hexdigest()
cache_dir = os.path.join(cache.directory, cache_key[:2])
os.makedirs(cache_dir, exist_ok=True)
cache_file_path = os.path.join(cache_dir, f"{cache_key}.tmp")
# Start copying to our cache file in chunks
def copy_to_cache_chunked():
try:
with open(full_path, 'rb') as source, open(cache_file_path, 'wb') as dest:
while True:
chunk = source.read(1024 * 1024) # 1MB chunks
if not chunk:
break
dest.write(chunk)
dest.flush() # Ensure data is written to disk immediately
# Once complete, register with diskcache for proper management
try:
with open(cache_file_path, 'rb') as f:
cache.set(subpath, f, read=True)
# Remove our temp file since diskcache now has it
if os.path.exists(cache_file_path):
os.remove(cache_file_path)
app.logger.info(f"Finished caching {subpath}")
except Exception as e:
app.logger.error(f"Failed to register with diskcache: {e}")
except Exception as e:
app.logger.error(f"Caching failed for {subpath}: {e}")
if os.path.exists(cache_file_path):
try:
os.remove(cache_file_path)
except:
pass
# Start the background copy
cache_thread = threading.Thread(target=copy_to_cache_chunked, daemon=True)
cache_thread.start()
# Wait for initial data to be written
max_wait = 5.0 # Maximum 5 seconds
wait_interval = 0.05 # Check every 50ms
elapsed = 0
while elapsed < max_wait:
if os.path.exists(cache_file_path) and os.path.getsize(cache_file_path) > 0:
file_path = cache_file_path
break
time.sleep(wait_interval)
elapsed += wait_interval
else:
# Cache file not ready - abort
app.logger.error(f"Cache file not created in time for {subpath}")
abort(503, description="Service temporarily unavailable - cache initialization failed")
# 6) Build response for non-image
filesize = os.path.getsize(file_path)
filesize = os.path.getsize(full_path)
filename = os.path.basename(full_path)
if as_attachment:
@ -509,19 +562,46 @@ def serve_file(subpath):
# Single send_file call with proper attachment handling
response = send_file(
file_path,
mimetype=mimetype,
conditional=True,
as_attachment=as_attachment,
download_name=filename if as_attachment else None
)
# For partial cache files, we need to handle this differently
if not cached_hit:
# Stream from the cache file as it's being written
def generate():
bytes_sent = 0
with open(file_path, 'rb') as f:
while bytes_sent < filesize:
# Read what's available
chunk = f.read(1024 * 1024) # 1MB chunks
if chunk:
bytes_sent += len(chunk)
yield chunk
else:
# No data available yet, wait a bit
time.sleep(0.1)
if as_attachment:
response.headers['X-Content-Type-Options'] = 'nosniff'
response.headers['Content-Disposition'] = 'attachment'
response = make_response(generate())
response.headers['Content-Type'] = mimetype
response.headers['Content-Length'] = str(filesize)
response.headers['Accept-Ranges'] = 'bytes'
if as_attachment:
response.headers['Content-Disposition'] = f'attachment; filename="{filename}"'
response.headers['X-Content-Type-Options'] = 'nosniff'
else:
response.headers['Content-Disposition'] = 'inline'
else:
response.headers['Content-Disposition'] = 'inline'
# Cached file - use normal send_file
response = send_file(
file_path,
mimetype=mimetype,
conditional=True,
as_attachment=as_attachment,
download_name=filename if as_attachment else None
)
if as_attachment:
response.headers['X-Content-Type-Options'] = 'nosniff'
response.headers['Content-Disposition'] = 'attachment'
else:
response.headers['Content-Disposition'] = 'inline'
response.headers['Cache-Control'] = 'public, max-age=86400'