remove crawl function

This commit is contained in:
lelo 2025-04-18 20:42:08 +00:00
parent 7c2b8e177f
commit a7080dd767

55
app.py
View File

@ -360,61 +360,6 @@ def get_transcript(subpath):
content = f.read()
return content, 200, {'Content-Type': 'text/markdown; charset=utf-8'}
@app.route("/crawl/<path:subpath>")
@auth.require_secret
def crawl_and_cache(subpath):
"""
Crawls through a directory and caches each file.
For images, it creates a thumbnail (max 1200x1200) and caches the processed image.
For non-images, it simply reads and caches the file bytes.
"""
root, *relative_parts = subpath.split('/')
base_path = session['folders'][root]
full_path = os.path.join(base_path, *relative_parts)
cached_files = [] # List to hold cached file relative paths
# Walk through all subdirectories and files
for root, dirs, files in os.walk(full_path):
for filename in files:
full_path_file = os.path.join(root, filename)
# Skip if this file is already in the cache
if cache.get(full_path_file):
continue
# Determine the MIME type
mime, _ = mimetypes.guess_type(full_path)
mime = mime or 'application/octet-stream'
# Process image files differently
if mime.startswith('image/'):
try:
with Image.open(full_path) as img:
# Create a thumbnail (max 1200x1200)
img.thumbnail((1200, 1200))
img_bytes_io = io.BytesIO()
# Save processed image as PNG
img.save(img_bytes_io, format='PNG', quality=85)
img_bytes = img_bytes_io.getvalue()
# Cache the processed image bytes along with its mime type
cache.set(full_path_file, (img_bytes, mime))
cached_files.append(full_path_file)
except Exception as e:
app.logger.error(f"Image processing failed for {full_path_file}: {e}")
else:
# Process non-image files
try:
with open(full_path_file, 'rb') as f:
file_bytes = f.read()
cache.set(full_path_file, (file_bytes, mime))
cached_files.append(full_path_file)
except Exception as e:
app.logger.error(f"Failed to read file {full_path_file}: {e}")
# Return the list of cached files as a JSON response
return json.dumps({"cached_files": cached_files}, indent=4), 200
def query_recent_connections():
global clients_connected, background_thread_running
background_thread_running = True