from flask import Flask, request, jsonify, render_template, send_file, session import pandas as pd import numpy as np from io import BytesIO from flask_session import Session app = Flask(__name__) app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key app.config['SESSION_TYPE'] = 'filesystem' app.config['SESSION_FILE_DIR'] = './.flask_session/' Session(app) STRIPE_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details'] RAISENOW_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung'] def get_dataframe(key, cols): """ Load a DataFrame from session or create an empty one with the given columns. """ records = session.get(key, []) if records: df = pd.DataFrame(records) else: df = pd.DataFrame(columns=cols) return df def get_merged_df(table_name): """ Return a DataFrame for the given table_name based on Stripe and Raisenow inputs, enforcing strict one-to-one matching with: - exact same-day matches first - then ±1-day fuzzy matches - no pandas merge suffixes at all - all original columns (including Raisenow's norm_zweck) preserved """ # --- load & normalize Stripe --- stripe = ( get_dataframe('stripe_import', STRIPE_COLS) .query("Type == 'Charge'") .copy() ) stripe['idx_stripe'] = stripe.index stripe['norm_date'] = pd.to_datetime(stripe['Created'], format='%Y-%m-%d %H:%M') stripe['norm_amount'] = stripe['Amount'].astype(str).str.replace(',', '.').astype(float) stripe['norm_email'] = stripe['Customer Email'].astype(str) stripe['norm_name'] = stripe.apply( lambda r: r['Customer Name'] or r['Details'], axis=1 ) # --- load & normalize Raisenow --- raisenow = ( get_dataframe('raiseNow_import', RAISENOW_COLS) .query("Zahlungsmethode != 'paypal'") .query("Status == 'succeeded'") .copy() ) raisenow['idx_raisenow'] = raisenow.index raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M') raisenow['norm_amount'] = raisenow['Betrag'].astype(float) raisenow['norm_email'] = raisenow['E-Mail-Adresse'].astype(str) raisenow['norm_name'] = raisenow['Vorname'].astype(str) + ' ' + raisenow['Nachname'].astype(str) # start with two‐step assignment raisenow['norm_zweck'] = raisenow.apply( lambda r: r.get('custom_parameters.altruja_action_name') or r.get('custom_parameters.altruja_custom1_code'), axis=1 ) # additional assignment: build a mask of rows where norm_zweck is still empty/NaN mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '') raisenow.loc[mask, 'norm_zweck'] = ( raisenow.loc[mask, 'raisenow_parameters.product.source_url'] .str.extract(r'https?://[^/]+/([^/?#]+)')[0] ) # --- return raw tables if requested --- if table_name == 'stripe_import': return stripe.dropna(axis=1, how='all') if table_name == 'raiseNow_import': return raisenow.dropna(axis=1, how='all') # --- 1) Greedy exact same-day matches --- pairs = [] # index Raisenow rows for fast lookup + dropping rr = raisenow.set_index('idx_raisenow') for _, s in stripe.iterrows(): # filter candidates by amount & name cand = rr[ (rr['norm_amount'] == s['norm_amount']) & (rr['norm_name'] == s['norm_name']) ].copy() if cand.empty: continue # compute absolute date difference (days only) date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() exact_cand = cand[date_diff == pd.Timedelta(0)] if not exact_cand.empty: # pick the first exact match best = exact_cand.index[0] pairs.append((int(s['idx_stripe']), int(best))) rr = rr.drop(best) # --- 2) Greedy fuzzy ±1-day matches on remaining rows --- used_stripe = {s for s, _ in pairs} stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy() for _, s in stripe_left.iterrows(): cand = rr[ (rr['norm_amount'] == s['norm_amount']) & (rr['norm_name'] == s['norm_name']) ].copy() if cand.empty: continue date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() cand = cand[date_diff <= pd.Timedelta(days=1)] if cand.empty: continue # pick the one with the smallest gap best = date_diff.idxmin() pairs.append((int(s['idx_stripe']), int(best))) rr = rr.drop(best) # --- build the merged DataFrame without suffixes --- merged_rows = [] for s_idx, r_idx in pairs: srow = stripe.loc[s_idx].to_dict() rrow = raisenow.loc[r_idx].to_dict() # drop any overlapping keys so we never get suffixes for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']: rrow.pop(k, None) # now combine so stripe values win for those keys, and raisenow adds its own columns merged = {**srow, **rrow} merged_rows.append(merged) combined = pd.DataFrame(merged_rows) # --- slice out the requested view --- if table_name == 'merged': result = combined elif table_name == 'stripe_only': used = {s for s, _ in pairs} result = stripe[~stripe['idx_stripe'].isin(used)] elif table_name == 'raisenow_only': used = {r for _, r in pairs} result = raisenow[~raisenow['idx_raisenow'].isin(used)] else: raise ValueError(f"Unknown table_name '{table_name}'") return result.dropna(axis=1, how='all') @app.route('/') def index(): return render_template('index.html') @app.route('/upload', methods=['POST']) def upload(): files = request.files.getlist('files') if not files: return jsonify({'error': 'No files uploaded'}), 400 for f in files: raw = ( pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f) ) raw = raw.dropna(how='all').dropna(axis=1, how='all') raw = raw.astype(object).replace({np.nan: None}) cols = list(raw.columns) if cols[:len(STRIPE_COLS)] == STRIPE_COLS: key = 'stripe_import' dedupe_col = 'ID' elif cols[:len(RAISENOW_COLS)] == RAISENOW_COLS: key = 'raiseNow_import' dedupe_col = 'Identifikationsnummer' else: continue existing = get_dataframe(key, []) combined = pd.concat([existing, raw], ignore_index=True) deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True) # Save back to session session[key] = deduped.astype(object).where(pd.notnull(deduped), None).to_dict(orient='records') return jsonify({'status': 'ok'}) @app.route('/get_table') def get_table(): table = request.args.get('table') df = get_merged_df(table) df = df.astype(object).where(pd.notnull(df), None) return jsonify({ 'columns': list(df.columns), 'data': df.to_dict(orient='records') }) @app.route('/download') def download(): sheets = { name: get_merged_df(name) for name in [ 'stripe_import', 'raiseNow_import', 'merged', 'stripe_only', 'raisenow_only' ] } output = BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: workbook = writer.book for name, df in sheets.items(): df.to_excel(writer, sheet_name=name, index=False) worksheet = writer.sheets[name] # 1) Freeze header row worksheet.freeze_panes(1, 0) # 2) Autofilter on the header row across all columns # (0,0) is the top-left cell; (len(df), len(df.columns)-1) covers all data rows worksheet.autofilter(0, 0, df.shape[0], df.shape[1] - 1) # 3) Set column widths to match first-row entries first_row = df.iloc[0].astype(str) for col_idx, cell_value in enumerate(first_row): worksheet.set_column(col_idx, col_idx, len(cell_value) + 2) output.seek(0) return send_file( output, as_attachment=True, download_name='all_tables.xlsx', mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ) if __name__ == '__main__': app.run(debug=True)