bethaus-app/apply_correction.py
2025-03-16 19:54:47 +01:00

49 lines
1.7 KiB
Python

import os
import sys
import re
import json
def apply_error_correction(text):
# Load the JSON file that contains your translations
with open('error_correction.json', 'r', encoding='utf-8') as file:
correction_dict = json.load(file)
# Combine keys into a single regex pattern
pattern = r'\b(' + '|'.join(re.escape(key) for key in correction_dict.keys()) + r')\b'
def replacement_func(match):
key = match.group(0)
return correction_dict.get(key, key)
# re.subn returns a tuple (new_string, number_of_subs_made)
corrected_text, count = re.subn(pattern, replacement_func, text)
return corrected_text, count
def process_folder(root_folder):
"""
Walk through root_folder and process .md files.
"""
for dirpath, _, filenames in os.walk(root_folder):
for filename in filenames:
if filename.lower().endswith(".md"):
file_path = os.path.join(dirpath, filename)
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
corrected_text, error_count = apply_error_correction(text)
# Only write to file if at least one error was corrected.
if error_count > 0:
with open(file_path, 'w', encoding='utf-8') as file:
file.write(corrected_text)
print(f"{file_path} - {error_count} errors corrected.")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python apply_correction.py <root_folder>")
sys.exit(1)
root_folder = sys.argv[1]
process_folder(root_folder)