commit 9c25c70a0f8a2bb3bc37c15da28142c6a26a62e0 Author: lelo Date: Wed May 21 16:44:43 2025 +0000 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..714cf83 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +app/__pycache__/ +.env diff --git a/app/app.py b/app/app.py new file mode 100644 index 0000000..92ee966 --- /dev/null +++ b/app/app.py @@ -0,0 +1,205 @@ +from flask import Flask, request, jsonify, render_template, send_file, session +import pandas as pd +import numpy as np +from io import BytesIO +from flask_session import Session + +app = Flask(__name__) +app.secret_key = "your-secret-key" # replace with a secure random key +# Configure server-side session (filesystem) to avoid size limits in cookies +app.config['SESSION_TYPE'] = 'filesystem' +app.config['SESSION_FILE_DIR'] = './.flask_session/' +Session(app) + +STRIPE_COLS = [ + 'Type','ID','Created','Description','Amount','Currency', + 'Converted Amount','Fees','Net','Converted Currency', + 'Customer Name','Customer Email','Details' +] +RAISENOW_COLS = [ + 'Identifikationsnummer','Erstellt','UTC-Offset','Status', + 'Betrag','Währung','Übernommene Gebühren - Betrag', + 'Übernommene Gebühren - Währung','Zahlungsmethode', + 'Zahlungsanbieter','Vorname','Nachname','E-Mail-Adresse', + 'custom_parameters.altruja_action_name','custom_parameters.altruja_custom1_code' +] + + +def get_dataframe(key, cols): + """ + Load a DataFrame from session or create an empty one with the given columns. + """ + records = session.get(key, []) + if records: + df = pd.DataFrame(records) + else: + df = pd.DataFrame(columns=cols) + return df + + +import pandas as pd + +def get_merged_df(table_name): + """ + Return a DataFrame for the given table_name based on stripe and raisenow inputs, + including a secondary merge for date tolerance of ±1 day. + """ + stripe_df = get_dataframe('stripe_import', STRIPE_COLS) + raisenow_df = get_dataframe('raiseNow_import', RAISENOW_COLS) + + # Normalize stripe + stripe_df = stripe_df.query("Type == 'Charge'") + stripe_df['norm_date'] = pd.to_datetime(stripe_df['Created'], format='%Y-%m-%d %H:%M') + stripe_df['norm_amount'] = stripe_df['Amount'].astype(str).str.replace(',', '.') + stripe_df['norm_amount'] = stripe_df['norm_amount'].astype(float) + stripe_df['norm_email'] = stripe_df['Customer Email'].astype(str) + stripe_df['norm_name'] = stripe_df.apply( + lambda x: x['Customer Name'] if x.get('Customer Name') else x['Details'], + axis=1 + ) + + # Normalize raisenow + raisenow_df = raisenow_df.query("Zahlungsmethode != 'paypal'") + raisenow_df = raisenow_df.query("Status == 'succeeded'") + raisenow_df['norm_date'] = pd.to_datetime(raisenow_df['Erstellt'], format='%Y-%m-%d %H:%M') + raisenow_df['norm_amount'] = raisenow_df['Betrag'].astype(float) + raisenow_df['norm_name'] = ( + raisenow_df['Vorname'].astype(str) + ' ' + raisenow_df['Nachname'].astype(str) + ) + raisenow_df['norm_email'] = raisenow_df['E-Mail-Adresse'].astype(str) + raisenow_df['norm_zweck'] = raisenow_df.apply( + lambda x: x['custom_parameters.altruja_action_name'] + if x.get('custom_parameters.altruja_action_name') + else x.get('custom_parameters.altruja_custom1_code'), + axis=1 + ) + + if table_name in ('stripe_import', 'raiseNow_import'): + df = stripe_df if table_name == 'stripe_import' else raisenow_df + return df.dropna(axis=1, how='all') + + # Exact merge + exact = pd.merge( + stripe_df, + raisenow_df, + on=['norm_amount', 'norm_name'], + how='outer', + suffixes=('_stripe', '_raisenow'), + indicator=True + ) + exact['date_diff'] = ( + exact['norm_date_stripe'].dt.date - exact['norm_date_raisenow'].dt.date + ).abs() + + # Separate matches + exact_matches = exact[(exact['_merge'] == 'both') & (exact['date_diff'] == pd.Timedelta(0))].copy() + stripe_only = exact[exact['_merge'] == 'left_only'].copy() + raisenow_only = exact[exact['_merge'] == 'right_only'].copy() + + # Fuzzy merge within ±1 day for remaining + # Merge stripe_only with raisenow_only on name and amount + fuzzy = pd.merge( + stripe_only.drop(columns=['_merge']), + raisenow_only.drop(columns=['_merge']), + on=['norm_amount', 'norm_name'], + suffixes=('_stripe', '_raisenow') + ) + fuzzy['date_diff'] = ( + fuzzy['norm_date_stripe'].dt.date - fuzzy['norm_date_raisenow'].dt.date + ).abs() + fuzzy_matches = fuzzy[fuzzy['date_diff'] <= pd.Timedelta(days=1)].copy() + + # Combine exact and fuzzy + combined = pd.concat([exact_matches, fuzzy_matches], ignore_index=True) + combined = combined.drop(columns=['_merge', 'date_diff'], errors='ignore') + + # Determine outputs + if table_name == 'merged': + result = combined + elif table_name == 'stripe_only': + # Exclude those in combined + matched_stripe_ids = combined['_stripe'] if '_stripe' in combined else None + result = stripe_df[~stripe_df.index.isin(matched_stripe_ids)] + elif table_name == 'raisenow_only': + matched_raisenow_ids = combined['_raisenow'] if '_raisenow' in combined else None + result = raisenow_df[~raisenow_df.index.isin(matched_raisenow_ids)] + else: + raise ValueError(f"Unknown table_name '{table_name}'") + + return result.dropna(axis=1, how='all') + + + +@app.route('/') +def index(): + return render_template('index.html') + + +@app.route('/upload', methods=['POST']) +def upload(): + files = request.files.getlist('files') + if not files: + return jsonify({'error': 'No files uploaded'}), 400 + + for f in files: + raw = ( + pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f) + ) + raw = raw.dropna(how='all').dropna(axis=1, how='all') + raw = raw.astype(object).replace({np.nan: None}) + cols = list(raw.columns) + + if cols[:len(STRIPE_COLS)] == STRIPE_COLS: + key = 'stripe_import' + dedupe_col = 'ID' + elif cols[:len(RAISENOW_COLS)] == RAISENOW_COLS: + key = 'raiseNow_import' + dedupe_col = 'Identifikationsnummer' + else: + continue + + existing = get_dataframe(key, []) + combined = pd.concat([existing, raw], ignore_index=True) + deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True) + + # Save back to session + session[key] = deduped.astype(object).where(pd.notnull(deduped), None).to_dict(orient='records') + + return jsonify({'status': 'ok'}) + + +@app.route('/get_table') +def get_table(): + table = request.args.get('table') + try: + df = get_merged_df(table) + except Exception as e: + return jsonify({'error': str(e)}), 400 + + df = df.astype(object).where(pd.notnull(df), None) + return jsonify({ + 'columns': list(df.columns), + 'data': df.to_dict(orient='records') + }) + + +@app.route('/download') +def download(): + sheets = { name: get_merged_df(name) + for name in ['stripe_import','raiseNow_import','merged','stripe_only','raisenow_only'] } + + output = BytesIO() + with pd.ExcelWriter(output, engine='xlsxwriter') as writer: + for name, df in sheets.items(): + df.to_excel(writer, sheet_name=name, index=False) + + output.seek(0) + return send_file( + output, + as_attachment=True, + download_name='all_tables.xlsx', + mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + ) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/app/requirements.txt b/app/requirements.txt new file mode 100644 index 0000000..0959418 --- /dev/null +++ b/app/requirements.txt @@ -0,0 +1,3 @@ +Flask +pandas +openpyxl \ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html new file mode 100644 index 0000000..d82aa4c --- /dev/null +++ b/app/templates/index.html @@ -0,0 +1,110 @@ + + + + + + Multi‐Table Excel Import + + + + + +
+

Excel Importer

+ +
+
+ + +
+ +
+ +
+
+ +
+
+ +
+
+ +
+
+
+
+ + + + + + \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ac0aa16 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,39 @@ +networks: + traefik: + external: true + +services: + app: + image: python:3.11-slim + container_name: flask_app + environment: + - FLASK_APP=app.py + - FLASK_ENV=production + restart: always + networks: + traefik: + volumes: + - ./app:/usr/src/app + working_dir: /usr/src/app + command: > + sh -c " + pip install --no-cache-dir -r requirements.txt && + flask run --host=0.0.0.0 --port=5000" + labels: + - "traefik.enable=true" + + # HTTP router (port 80), redirecting to HTTPS + - "traefik.http.routers.${CONTAINER_NAME}.rule=${HOST_RULE}" + - "traefik.http.routers.${CONTAINER_NAME}.entrypoints=web" + - "traefik.http.routers.${CONTAINER_NAME}.middlewares=redirect-to-https" + - "traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https" + + # HTTPS router (TLS via Let's Encrypt) + - "traefik.http.routers.${CONTAINER_NAME}-secure.rule=${HOST_RULE}" + - "traefik.http.routers.${CONTAINER_NAME}-secure.entrypoints=websecure" + - "traefik.http.routers.${CONTAINER_NAME}-secure.tls=true" + - "traefik.http.routers.${CONTAINER_NAME}-secure.tls.certresolver=myresolver" + + # Internal port mapping (Gitea uses port 3000 by default) + - "traefik.http.services.${CONTAINER_NAME}.loadbalancer.server.port=5000" +