from flask import Flask, request, jsonify, render_template, send_file, session import pandas as pd import numpy as np from io import BytesIO from flask_session import Session from datetime import datetime app = Flask(__name__) app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key app.config['SESSION_TYPE'] = 'filesystem' app.config['SESSION_FILE_DIR'] = './.flask_session/' Session(app) STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details'] RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung'] def get_dataframe(key): """ Load a DataFrame from session. """ records = session.get(key, []) if records: df = pd.DataFrame(records) else: df = pd.DataFrame() return df def get_merged_df(table_name): # return table_name: str """ Return a DataFrame for the given table_name based on Stripe and Raisenow inputs, enforcing strict one-to-one matching with: - exact same-day matches first - then ±1-day fuzzy matches - no pandas merge suffixes at all - all original columns (including Raisenow's norm_zweck) preserved """ print('calculated DataFrame') # --- load & normalize Stripe --- stripe_import = get_dataframe('stripe_import') if stripe_import.empty: return pd.DataFrame() stripe_charge = ( stripe_import .query("Type == 'Charge'") .copy() ) stripe_adjustment = ( stripe_import .query("Type == 'Adjustment'") .copy() ) stripe_refund = ( stripe_import .query("Type == 'Payment Failure Refund'") .copy() ) stripe_stripeFee = ( stripe_import .query("Type == 'Stripe Fee'") .copy() ) # sum up the fees total_stripe_charge_fees = stripe_charge['Fees'].astype(str).str.replace(',', '.').astype(float).sum() total_stripe_adjustment_fees = stripe_adjustment['Fees'].astype(str).str.replace(',', '.').astype(float).sum() total_stripe_refund_fees = stripe_refund['Fees'].astype(str).str.replace(',', '.').astype(float).sum() total_stripe_stripeFee_fees = stripe_stripeFee['Fees'].astype(str).str.replace(',', '.').astype(float).sum() stripe_adjustment['norm_date'] = pd.to_datetime(stripe_adjustment['Created'], format='%Y-%m-%d %H:%M') stripe_adjustment['norm_amount'] = stripe_adjustment['Amount'].astype(str).str.replace(',', '.').astype(float) stripe_adjustment['norm_zweck'] = "Korrekturen" stripe_stripeFee['norm_date'] = pd.to_datetime(stripe_stripeFee['Created'], format='%Y-%m-%d %H:%M') stripe_stripeFee['norm_amount'] = stripe_stripeFee['Amount'].astype(str).str.replace(',', '.').astype(float) stripe_stripeFee['norm_zweck'] = "Stripe" # Extract the “py_…” token from stripe_refund description stripe_refund['norm_payment_id'] = stripe_refund['Description'].str.extract(r'(py_[A-Za-z0-9]+)') # Build a list of all extracted py_ IDs pyids = stripe_refund['norm_payment_id'].dropna().unique().tolist() # Remove from stripe_charge any row whose ID is in that list stripe_charge = stripe_charge[~stripe_charge['ID'].isin(pyids)] stripe_charge['idx_stripe'] = stripe_charge.index stripe_charge['norm_date'] = pd.to_datetime(stripe_charge['Created'], format='%Y-%m-%d %H:%M') stripe_charge['norm_amount'] = stripe_charge['Amount'].astype(str).str.replace(',', '.').astype(float) stripe_charge['norm_email'] = stripe_charge['Customer Email'].fillna('').astype(str) stripe_charge['norm_name'] = stripe_charge.apply( lambda r: r['Customer Name'] or r['Details'], axis=1 ) # --- load & normalize Raisenow --- raisenow_import = get_dataframe('raisenow_import') raisenow = ( raisenow_import .query("Zahlungsmethode != 'paypal'") .query("Status == 'succeeded'") .copy() ) raisenow['idx_raisenow'] = raisenow.index raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M') raisenow['norm_amount'] = raisenow['Betrag'].astype(float) raisenow['norm_email'] = raisenow['E-Mail-Adresse'].astype(str) raisenow['norm_name'] = raisenow['Vorname'].astype(str) + ' ' + raisenow['Nachname'].astype(str) # start with two‐step assignment raisenow['norm_zweck'] = raisenow.apply( lambda r: r.get('custom_parameters.altruja_action_name') or r.get('custom_parameters.altruja_custom1_code'), axis=1 ) # additional assignment: build a mask of rows where norm_zweck is still empty/NaN mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '') raisenow.loc[mask, 'norm_zweck'] = ( raisenow.loc[mask, 'raisenow_parameters.product.source_url'] .str.extract(r'https?://[^/]+/([^/?#]+)')[0] ) # --- return raw tables if requested --- if table_name == 'stripe_import': return stripe_import.dropna(axis=1, how='all') if table_name == 'raisenow_import': return raisenow_import.dropna(axis=1, how='all') # --- 1) Greedy exact same-day matches --- pairs = [] # index Raisenow rows for fast lookup + dropping rr = raisenow.set_index('idx_raisenow') for _, s in stripe_charge.iterrows(): # filter candidates by amount & name cand = rr[ (rr['norm_amount'] == s['norm_amount']) & ( (rr['norm_name'] == s['norm_name']) | (rr['norm_email'] == s['norm_email']) ) ].copy() if cand.empty: continue # compute absolute date difference (days only) date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() exact_cand = cand[date_diff == pd.Timedelta(0)] if not exact_cand.empty: # pick the first exact match best = exact_cand.index[0] pairs.append((int(s['idx_stripe']), int(best))) rr = rr.drop(best) # --- 2) Greedy fuzzy ±1-day matches on remaining rows --- used_stripe = {s for s, _ in pairs} stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy() for _, s in stripe_left.iterrows(): cand = rr[ (rr['norm_amount'] == s['norm_amount']) & ( (rr['norm_name'] == s['norm_name']) | (rr['norm_email'] == s['norm_email']) ) ].copy() if cand.empty: continue date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() cand = cand[date_diff <= pd.Timedelta(days=1)] if cand.empty: continue # pick the one with the smallest gap best = date_diff.idxmin() pairs.append((int(s['idx_stripe']), int(best))) rr = rr.drop(best) # --- 3) Unique amount & exact-date-only matches --- # Recompute “leftovers” now after steps 1+2 used_stripe = {s for s, _ in pairs} stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy() # Prep for grouping stripe_left['norm_date_norm'] = stripe_left['norm_date'].dt.normalize() rr_df = rr.reset_index() rr_df['norm_date_norm'] = rr_df['norm_date'].dt.normalize() # Count how many per (amount, date) in each stripe_counts = ( stripe_left .groupby(['norm_amount','norm_date_norm']) .size() .reset_index(name='stripe_count') ) rr_counts = ( rr_df .groupby(['norm_amount','norm_date_norm']) .size() .reset_index(name='rr_count') ) # Find the pairs where both counts == 1 unique_keys = pd.merge(stripe_counts, rr_counts, on=['norm_amount','norm_date_norm']) unique_keys = unique_keys[ (unique_keys['stripe_count'] == 1) & (unique_keys['rr_count'] == 1) ] # Pull those exact singletons through for _, u in unique_keys.iterrows(): amt = u['norm_amount'] d = u['norm_date_norm'] srow = stripe_left[ (stripe_left['norm_amount'] == amt) & (stripe_left['norm_date_norm'] == d) ].iloc[0] rrow = rr_df[ (rr_df['norm_amount'] == amt) & (rr_df['norm_date_norm'] == d) ].iloc[0] pairs.append((int(srow['idx_stripe']), int(rrow['idx_raisenow']))) rr = rr.drop(rrow['idx_raisenow']) # --- build the merged DataFrame --- merged_rows = [] for s_idx, r_idx in pairs: srow = stripe_charge.loc[s_idx].to_dict() rrow = raisenow.loc[r_idx].to_dict() # drop any overlapping keys so we never get suffixes for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']: rrow.pop(k, None) # now combine so stripe values win for those keys, and raisenow adds its own columns merged = {**srow, **rrow} merged_rows.append(merged) combined = pd.DataFrame(merged_rows) starting_columns = ['norm_name', 'norm_date', 'norm_email', 'norm_amount', 'norm_zweck'] # reorder columns to put the most important ones first combined = pd.concat([ combined[starting_columns], combined.drop(columns=starting_columns) ], axis=1) # --- slice out the requested view --- if table_name == 'merged': result = combined elif table_name == 'stripe_only': used = {s for s, _ in pairs} result = stripe_charge[~stripe_charge['idx_stripe'].isin(used)] elif table_name == 'raisenow_only': used = {r for _, r in pairs} result = raisenow[~raisenow['idx_raisenow'].isin(used)] elif table_name == 'export': used = {s for s, _ in pairs} stripe_only = stripe_charge[~stripe_charge['idx_stripe'].isin(used)] result = pd.concat([combined, stripe_only, stripe_adjustment, stripe_stripeFee], ignore_index=True) # add the Stripe fees to the end of the table new_rows = [ {'norm_zweck': 'Buchungsgebühren', 'norm_amount': total_stripe_charge_fees * (-1)}, {'norm_zweck': 'Rückbuchungsgebühren', 'norm_amount': total_stripe_refund_fees * (-1)}, {'norm_zweck': 'Korrekturgebühren', 'norm_amount': total_stripe_adjustment_fees * (-1)}, {'norm_zweck': 'Stripe Gebühren', 'norm_amount': total_stripe_stripeFee_fees * (-1)} ] new_rows_df = pd.DataFrame(new_rows) result = pd.concat([result, new_rows_df], ignore_index=True) # fix empty name values for i, row in result.iterrows(): if pd.isna(row.get('norm_name')) and pd.notna(row.get('Vorname')) and pd.notna(row.get('Nachname')): result.at[i, 'norm_name'] = f"{row.get('Vorname')} {row.get('Nachname')}".strip() # fix empty email values for i, row in result.iterrows(): if (pd.isna(row.get('norm_email')) or row.get('norm_email') == '') and pd.notna(row.get('E-Mail-Adresse')): result.at[i, 'norm_email'] = f"{row.get('E-Mail-Adresse')}".strip() else: raise ValueError(f"Unknown table_name '{table_name}'") return result.dropna(axis=1, how='all') @app.route('/') def index(): return render_template('index.html') @app.route('/upload', methods=['POST']) def upload(): files = request.files.getlist('files') if not files: return jsonify({'error': 'No files uploaded'}), 400 for f in files: print('uploading file:', f.filename) raw = ( pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f) ) raw = raw.dropna(how='all').dropna(axis=1, how='all') print('number of rows:', len(raw)) raw = raw.astype(object).replace({np.nan: None}) cols = list(raw.columns) if cols[:len(STRIPE_STARTING_COLS)] == STRIPE_STARTING_COLS: key = 'stripe_import' dedupe_col = 'ID' elif cols[:len(RAISENOW_STARTING_COLS)] == RAISENOW_STARTING_COLS: key = 'raisenow_import' dedupe_col = 'Identifikationsnummer' else: continue existing = get_dataframe(key) combined = pd.concat([existing, raw], ignore_index=True) deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True) # Save back to session session[key] = deduped.astype(object).where(pd.notnull(deduped), None).to_dict(orient='records') return jsonify({'status': 'ok'}) @app.route('/get_table') def get_table(): table = request.args.get('table') print('get_table:', table) df = get_merged_df(table) print('number of rows:', len(df)) df = df.astype(object).where(pd.notnull(df), None) return jsonify({ 'columns': list(df.columns), 'data': df.to_dict(orient='records') }) @app.route('/download') def download(): sheets = { name: get_merged_df(name) for name in [ 'stripe_import', 'raisenow_import', 'merged', 'stripe_only', 'raisenow_only', 'export' ] } output = BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: workbook = writer.book for name, df in sheets.items(): df.to_excel(writer, sheet_name=name, index=False) worksheet = writer.sheets[name] # 1) Freeze header row worksheet.freeze_panes(1, 0) # 2) Autofilter on the header row across all columns # (0,0) is the top-left cell; (len(df), len(df.columns)-1) covers all data rows worksheet.autofilter(0, 0, df.shape[0], df.shape[1] - 1) # 3) Set column widths to match first-row entries first_row = df.iloc[0].astype(str) for col_idx, cell_value in enumerate(first_row): worksheet.set_column(col_idx, col_idx, len(cell_value) + 2) output.seek(0) return send_file( output, as_attachment=True, download_name='all_tables.xlsx', mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ) @app.route('/clear_session', methods=['POST']) def clear_session(): """ Clear all session data and reset server-side stored DataFrames. """ session.clear() return jsonify({'status': 'session cleared'}) def export_to_special_format( df: pd.DataFrame, reference: str, account: str, statement_number: int, opening_date: datetime, opening_balance: float, currency: str, closing_date: datetime = None, closing_balance: float = None ) -> str: """ Convert a DataFrame of transactions into the special SWIFT-like file format. Parameters ---------- df : pd.DataFrame Must contain columns: - 'value_date' (datetime) - 'booking_date' (datetime) - 'dc' (str): 'C' for credit, 'D' for debit - 'amount' (float) - optional 'transaction_code' (str) - optional 'bank_reference' (str) - 'narrative' (str) reference : str Message reference for :20: account : str Account number for :25: statement_number : int Statement sequence for :28C: (will be zero-padded to 5 digits) opening_date : datetime Opening balance date opening_balance : float Opening balance amount (positive) currency : str Three-letter currency code (e.g. 'EUR') closing_date : datetime, optional Closing balance date closing_balance : float, optional Closing balance amount (positive) Returns ------- str The formatted file content. """ lines = [] # Header lines.append(f":20:{reference}") lines.append(f":25:{account}") lines.append(f":28C:{statement_number:05d}") # Opening balance :60F: od = opening_date.strftime('%y%m%d') ob = f"{opening_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '') lines.append(f":60F:C{od}{currency}{ob}") # Transactions for _, row in df.iterrows(): vd = row['value_date'].strftime('%y%m%d') bd = row['booking_date'].strftime('%m%d') dc = row['dc'] amt = f"{row['amount']:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '') tcode = row.get('transaction_code', '') bref = row.get('bank_reference', '') lines.append(f":61:{vd}{bd}{dc}{amt}{tcode}{bref}") lines.append(f":86:{row['narrative']}") # Closing balance :62F: if closing_date and closing_balance is not None: cd = closing_date.strftime('%y%m%d') cb = f"{closing_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '') lines.append(f":62F:C{cd}{currency}{cb}") return "\n".join(lines) # Example usage: # df = pd.DataFrame([...]) # content = export_to_special_format( # df, # reference='REFEXCELEXPORT', # account='11223344/55667788', # statement_number=0, # opening_date=datetime(2025,3,6), # opening_balance=0.00, # currency='EUR', # closing_date=datetime(2025,3,6), # closing_balance=12048.71 # ) # with open('statement.txt', 'w') as f: # f.write(content) if __name__ == '__main__': app.run(debug=True)