improve indexing
This commit is contained in:
parent
04bb218ac7
commit
eec9deefad
@ -128,36 +128,46 @@ def updatefileindex():
|
||||
# Prepend the foldername so it becomes part of the stored relative path.
|
||||
relative_path = os.path.join(foldername, rel_part).replace(os.sep, '/')
|
||||
filetype = os.path.splitext(entry.name)[1].lower()
|
||||
transcript = None
|
||||
|
||||
# Check for a corresponding transcript file in a sibling "Transkription" folder.
|
||||
parent_dir = os.path.dirname(entry_path)
|
||||
transcript_dir = os.path.join(parent_dir, "Transkription")
|
||||
transcript_filename = os.path.splitext(entry.name)[0] + ".md"
|
||||
transcript_path = os.path.join(transcript_dir, transcript_filename)
|
||||
if os.path.exists(transcript_path):
|
||||
try:
|
||||
with open(transcript_path, 'r', encoding='utf-8') as tf:
|
||||
transcript = tf.read()
|
||||
except Exception:
|
||||
transcript = None
|
||||
|
||||
|
||||
# Retrieve the hit count for this file.
|
||||
hit_count = get_hit_count(relative_path)
|
||||
|
||||
category, titel, name, performance_date, site = None, None, None, None, None
|
||||
|
||||
# Determine the site
|
||||
if foldername == 'Gottesdienste Speyer':
|
||||
site = 'Speyer'
|
||||
elif foldername == 'Gottesdienste Schwegenheim':
|
||||
site = 'Schwegenheim'
|
||||
|
||||
if filetype == '.mp3':
|
||||
# Determine the site
|
||||
if foldername == 'Gottesdienste Speyer':
|
||||
site = 'Speyer'
|
||||
elif foldername == 'Gottesdienste Schwegenheim':
|
||||
site = 'Schwegenheim'
|
||||
transcript = None
|
||||
|
||||
# Check for a corresponding transcript file in a sibling "Transkription" folder.
|
||||
parent_dir = os.path.dirname(entry_path)
|
||||
transcript_dir = os.path.join(parent_dir, "Transkription")
|
||||
transcript_filename = os.path.splitext(entry.name)[0] + ".md"
|
||||
transcript_path = os.path.join(transcript_dir, transcript_filename)
|
||||
if os.path.exists(transcript_path):
|
||||
try:
|
||||
with open(transcript_path, 'r', encoding='utf-8') as tf:
|
||||
transcript = tf.read()
|
||||
except Exception:
|
||||
transcript = None
|
||||
|
||||
# extract category and titel from filename
|
||||
filename_ext = os.path.splitext(entry.name)[0]
|
||||
left_side, right_side = filename_ext.split('-', 1) if '-' in filename_ext else (filename_ext, None)
|
||||
if 'predigt' in left_side.lower():
|
||||
try:
|
||||
int(left_side.strip())
|
||||
# first part is only a number
|
||||
previous_right_side = right_side
|
||||
left_side, right_side = previous_right_side.split('-', 1) if '-' in previous_right_side else (previous_right_side, None)
|
||||
except:
|
||||
# first part not a number
|
||||
continue
|
||||
|
||||
if 'predig' in left_side.lower():
|
||||
category = 'Predigt'
|
||||
elif 'wort' in left_side.lower() or 'einladung' in left_side.lower():
|
||||
category = 'Vorwort'
|
||||
@ -186,22 +196,22 @@ def updatefileindex():
|
||||
titel = None
|
||||
name = None
|
||||
|
||||
# extract the date from path using regex (dd.mm.yyyy or dd.mm.yy)
|
||||
date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path)
|
||||
if date_match:
|
||||
date_str = date_match.group(1)
|
||||
# Convert to YYYY-MM-DD format
|
||||
# extract the date from path using regex (dd.mm.yyyy or dd.mm.yy)
|
||||
date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2,4})', relative_path)
|
||||
if date_match:
|
||||
date_str = date_match.group(1)
|
||||
# Convert to YYYY-MM-DD format
|
||||
try:
|
||||
date_obj = datetime.strptime(date_str, '%d.%m.%Y')
|
||||
performance_date = date_obj.strftime('%d.%m.%Y')
|
||||
except ValueError:
|
||||
try:
|
||||
date_obj = datetime.strptime(date_str, '%d.%m.%Y')
|
||||
date_obj = datetime.strptime(date_str, '%d.%m.%y')
|
||||
performance_date = date_obj.strftime('%d.%m.%Y')
|
||||
except ValueError:
|
||||
try:
|
||||
date_obj = datetime.strptime(date_str, '%d.%m.%y')
|
||||
performance_date = date_obj.strftime('%d.%m.%Y')
|
||||
except ValueError:
|
||||
performance_date = None
|
||||
else:
|
||||
performance_date = None
|
||||
performance_date = None
|
||||
else:
|
||||
performance_date = None
|
||||
|
||||
scanned_files.append((relative_path, foldername, entry.name, filetype, category, titel, name, performance_date, site, transcript, hit_count))
|
||||
current_keys.add((relative_path, entry.name))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user