from flask import Flask, request, jsonify, render_template, send_file, session import pandas as pd import numpy as np from io import BytesIO from flask_session import Session from datetime import datetime app = Flask(__name__) app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key app.config['SESSION_TYPE'] = 'filesystem' app.config['SESSION_FILE_DIR'] = './.flask_session/' Session(app) STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount'] RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung'] def get_dataframe(key): """ Load a DataFrame from session. """ records = session.get(key, []) if records: df = pd.DataFrame(records) else: df = pd.DataFrame() return df def get_merged_df(table_name): # return table_name: str """ Return a DataFrame for the given table_name based on Stripe and Raisenow inputs, enforcing strict one-to-one matching with: - exact same-day matches first - then ±1-day fuzzy matches - no pandas merge suffixes at all - all original columns (including Raisenow's norm_zweck) preserved """ print('calculate DataFrame...') # --- load & normalize Stripe --- stripe_import = get_dataframe('stripe_import') if stripe_import.empty: return pd.DataFrame() stripe_charge = ( stripe_import .query("Type == 'Charge'") .copy() ) stripe_adjustment = ( stripe_import .query("Type == 'Adjustment'") .copy() ) stripe_refund = ( stripe_import .query("Type == 'Payment Failure Refund'") .copy() ) stripe_stripeFee = ( stripe_import .query("Type == 'Stripe Fee'") .copy() ) # sum up the fees total_stripe_charge_fees = stripe_charge['Fees'].astype(str).str.replace(',', '.').astype(float).sum() total_stripe_adjustment_fees = stripe_adjustment['Fees'].astype(str).str.replace(',', '.').astype(float).sum() total_stripe_refund_fees = stripe_refund['Fees'].astype(str).str.replace(',', '.').astype(float).sum() total_stripe_stripeFee_fees = stripe_stripeFee['Fees'].astype(str).str.replace(',', '.').astype(float).sum() stripe_adjustment['norm_date'] = pd.to_datetime(stripe_adjustment['Created'], format='%Y-%m-%d %H:%M') stripe_adjustment['norm_amount'] = stripe_adjustment['Amount'].astype(str).str.replace(',', '.').astype(float) stripe_adjustment['norm_zweck'] = "Korrekturen" stripe_adjustment['norm_name'] = "Verrechnung Korrekturen" stripe_adjustment['norm_currency'] = stripe_adjustment['Currency'].astype(str).str.upper() stripe_stripeFee['norm_date'] = pd.to_datetime(stripe_stripeFee['Created'], format='%Y-%m-%d %H:%M') stripe_stripeFee['norm_amount'] = stripe_stripeFee['Amount'].astype(str).str.replace(',', '.').astype(float) stripe_stripeFee['norm_zweck'] = "Stripe" stripe_stripeFee['norm_name'] = "Verrechnung Stripe" stripe_stripeFee['norm_currency'] = stripe_stripeFee['Currency'].astype(str).str.upper() # Extract the “py_…” token from stripe_refund description stripe_refund['norm_payment_id'] = stripe_refund['Description'].str.extract(r'(py_[A-Za-z0-9]+)') # Build a list of all extracted py_ IDs pyids = stripe_refund['norm_payment_id'].dropna().unique().tolist() # Remove from stripe_charge any row whose ID is in that list stripe_charge = stripe_charge[~stripe_charge['ID'].isin(pyids)] stripe_charge['idx_stripe'] = stripe_charge.index stripe_charge['norm_date'] = pd.to_datetime(stripe_charge['Created'], format='%Y-%m-%d %H:%M') stripe_charge['norm_amount'] = stripe_charge['Amount'].astype(str).str.replace(',', '.').astype(float) stripe_charge['norm_email'] = stripe_charge['Customer Email'].fillna('').astype(str) stripe_charge['norm_currency'] = stripe_charge['Currency'].astype(str).str.upper() stripe_charge['norm_name'] = stripe_charge.apply( lambda r: r['Customer Name'] or r['Details'], axis=1 ) # --- load & normalize Raisenow --- raisenow_import = get_dataframe('raisenow_import') raisenow = ( raisenow_import .query("Zahlungsmethode != 'paypal'") .query("Status == 'succeeded'") .copy() ) raisenow['idx_raisenow'] = raisenow.index raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M') raisenow['norm_amount'] = raisenow['Betrag'].astype(float) raisenow['norm_email'] = raisenow['E-Mail-Adresse'].astype(str) raisenow['norm_name'] = raisenow['Vorname'].astype(str) + ' ' + raisenow['Nachname'].astype(str) raisenow['norm_currency'] = raisenow['Währung'].astype(str).str.upper() # start with two‐step assignment raisenow['norm_zweck'] = raisenow.apply( lambda r: r.get('custom_parameters.altruja_action_name') or r.get('custom_parameters.altruja_custom1_code'), axis=1 ) # additional assignment: build a mask of rows where norm_zweck is still empty/NaN mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '') if mask.any() and 'raisenow_parameters.product.source_url' in raisenow.columns: raisenow.loc[mask, 'norm_zweck'] = ( raisenow.loc[mask, 'raisenow_parameters.product.source_url'] .str.extract(r'https?://[^/]+/([^/?#]+)')[0] ) # --- return raw tables if requested --- if table_name == 'stripe_import': return stripe_import.dropna(axis=1, how='all') if table_name == 'raisenow_import': return raisenow_import.dropna(axis=1, how='all') # --- 1) Greedy exact same-day matches --- pairs = [] # index Raisenow rows for fast lookup + dropping rr = raisenow.set_index('idx_raisenow') for _, s in stripe_charge.iterrows(): # filter candidates by amount & name cand = rr[ (rr['norm_amount'] == s['norm_amount']) & ( (rr['norm_name'] == s['norm_name']) | (rr['norm_email'] == s['norm_email']) ) ].copy() if cand.empty: continue # compute absolute date difference (days only) date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() exact_cand = cand[date_diff == pd.Timedelta(0)] if not exact_cand.empty: # pick the first exact match best = exact_cand.index[0] pairs.append((int(s['idx_stripe']), int(best))) rr = rr.drop(best) # --- 2) Greedy fuzzy ±1-day matches on remaining rows --- used_stripe = {s for s, _ in pairs} stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy() for _, s in stripe_left.iterrows(): cand = rr[ (rr['norm_amount'] == s['norm_amount']) & ( (rr['norm_name'] == s['norm_name']) | (rr['norm_email'] == s['norm_email']) ) ].copy() if cand.empty: continue date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() cand = cand[date_diff <= pd.Timedelta(days=1)] if cand.empty: continue # pick the one with the smallest gap best = date_diff.idxmin() pairs.append((int(s['idx_stripe']), int(best))) rr = rr.drop(best) # --- 3) Unique amount & exact-date-only matches --- # Recompute “leftovers” now after steps 1+2 used_stripe = {s for s, _ in pairs} stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy() # Prep for grouping stripe_left['norm_date_norm'] = stripe_left['norm_date'].dt.normalize() rr_df = rr.reset_index() rr_df['norm_date_norm'] = rr_df['norm_date'].dt.normalize() # Count how many per (amount, date) in each stripe_counts = ( stripe_left .groupby(['norm_amount','norm_date_norm']) .size() .reset_index(name='stripe_count') ) rr_counts = ( rr_df .groupby(['norm_amount','norm_date_norm']) .size() .reset_index(name='rr_count') ) # Find the pairs where both counts == 1 unique_keys = pd.merge(stripe_counts, rr_counts, on=['norm_amount','norm_date_norm']) unique_keys = unique_keys[ (unique_keys['stripe_count'] == 1) & (unique_keys['rr_count'] == 1) ] # Pull those exact singletons through for _, u in unique_keys.iterrows(): amt = u['norm_amount'] d = u['norm_date_norm'] srow = stripe_left[ (stripe_left['norm_amount'] == amt) & (stripe_left['norm_date_norm'] == d) ].iloc[0] rrow = rr_df[ (rr_df['norm_amount'] == amt) & (rr_df['norm_date_norm'] == d) ].iloc[0] pairs.append((int(srow['idx_stripe']), int(rrow['idx_raisenow']))) rr = rr.drop(rrow['idx_raisenow']) # --- build the merged DataFrame --- merged_rows = [] for s_idx, r_idx in pairs: srow = stripe_charge.loc[s_idx].to_dict() rrow = raisenow.loc[r_idx].to_dict() # drop any overlapping keys so we never get suffixes for k in ['norm_name','norm_date','norm_email','norm_amount','norm_currency','idx_stripe']: rrow.pop(k, None) # now combine so stripe values win for those keys, and raisenow adds its own columns merged = {**srow, **rrow} merged_rows.append(merged) combined = pd.DataFrame(merged_rows) starting_columns = ['norm_name', 'norm_date', 'norm_email', 'norm_amount', 'norm_currency', 'norm_zweck'] # reorder columns to put the most important ones first combined = pd.concat([ combined[starting_columns], combined.drop(columns=starting_columns) ], axis=1) # --- slice out the requested view --- if table_name == 'merged': result = combined elif table_name == 'stripe_only': used = {s for s, _ in pairs} result = stripe_charge[~stripe_charge['idx_stripe'].isin(used)] elif table_name == 'raisenow_only': used = {r for _, r in pairs} result = raisenow[~raisenow['idx_raisenow'].isin(used)] elif table_name == 'export': used = {s for s, _ in pairs} stripe_only = stripe_charge[~stripe_charge['idx_stripe'].isin(used)] result = pd.concat([combined, stripe_only, stripe_adjustment, stripe_stripeFee], ignore_index=True) # add the Stripe fees to the end of the table # Set the timestamp to the last day of the month used by the dataset latest_date = pd.to_datetime(result['norm_date']).max() total_timestamp = pd.Timestamp(year=latest_date.year, month=latest_date.month, day=1) + pd.offsets.MonthEnd(0) total_timestamp = total_timestamp.replace(hour=23, minute=59, second=59, microsecond=0) new_rows = [ {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Buchungsgebühren', 'norm_amount': total_stripe_charge_fees * (-1)}, {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Rückbuchungsgebühren', 'norm_amount': total_stripe_refund_fees * (-1)}, {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Korrekturgebühren', 'norm_amount': total_stripe_adjustment_fees * (-1)}, {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Stripe Gebühren', 'norm_amount': total_stripe_stripeFee_fees * (-1)} ] new_rows_df = pd.DataFrame(new_rows) result = pd.concat([result, new_rows_df], ignore_index=True) # fix empty name values for i, row in result.iterrows(): if pd.isna(row.get('norm_name')) and pd.notna(row.get('Vorname')) and pd.notna(row.get('Nachname')): result.at[i, 'norm_name'] = f"{row.get('Vorname')} {row.get('Nachname')}".strip() # fix empty email values for i, row in result.iterrows(): if (pd.isna(row.get('norm_email')) or row.get('norm_email') == '') and pd.notna(row.get('E-Mail-Adresse')): result.at[i, 'norm_email'] = f"{row.get('E-Mail-Adresse')}".strip() # rename columns result = result.rename(columns={ 'norm_name': 'name', 'norm_date': 'booking_date', 'norm_email': 'email', 'norm_amount': 'amount', 'norm_currency': 'currency', 'norm_zweck': 'reference' }) # add sign column based on amount result['sign'] = result['amount'].apply(lambda x: 'CRDT' if x >= 0 else 'DBIT') # sort columns result = result[[ 'booking_date', 'name', 'email', 'amount', 'currency', 'sign', 'reference' ]] else: raise ValueError(f"Unknown table_name '{table_name}'") return result.dropna(axis=1, how='all') @app.route('/') def index(): return render_template('index.html') @app.route('/upload', methods=['POST']) def upload(): files = request.files.getlist('files') if not files: return jsonify({'error': 'No files uploaded'}), 400 for f in files: print('uploading file:', f.filename) raw = ( pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f) ) raw = raw.dropna(how='all') raw = raw.astype(object).replace({np.nan: None}) cols = list(raw.columns) if cols[:len(STRIPE_STARTING_COLS)] == STRIPE_STARTING_COLS: key = 'stripe_import' dedupe_col = 'ID' elif cols[:len(RAISENOW_STARTING_COLS)] == RAISENOW_STARTING_COLS: key = 'raisenow_import' dedupe_col = 'Identifikationsnummer' else: print('file does not match expected formats:', f.filename) return jsonify({ "status": "error", "message": f"File '{f.filename}' does not match expected formats." }), 400 existing = get_dataframe(key) combined = pd.concat([existing, raw], ignore_index=True) deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True) # Save back to session session[key] = deduped.astype(object).where(pd.notnull(deduped), None).to_dict(orient='records') return jsonify({'status': 'ok'}) @app.route('/get_table') def get_table(): table = request.args.get('table') print('get_table:', table) df = get_merged_df(table) print('number of rows:', len(df)) df = df.astype(object).where(pd.notnull(df), None) return jsonify({ 'columns': list(df.columns), 'data': df.to_dict(orient='records') }) @app.route('/download_xlsx') def download_xlsx(): sheets = { name: get_merged_df(name) for name in [ 'stripe_import', 'raisenow_import', 'merged', 'stripe_only', 'raisenow_only', 'export' ] } output = BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: workbook = writer.book for name, df in sheets.items(): df.to_excel(writer, sheet_name=name, index=False) worksheet = writer.sheets[name] # 1) Freeze header row worksheet.freeze_panes(1, 0) # 2) Autofilter on the header row across all columns # (0,0) is the top-left cell; (len(df), len(df.columns)-1) covers all data rows worksheet.autofilter(0, 0, df.shape[0], df.shape[1] - 1) # 3) Set column widths to match first-row entries first_row = df.iloc[0].astype(str) for col_idx, cell_value in enumerate(first_row): worksheet.set_column(col_idx, col_idx, len(cell_value) + 2) output.seek(0) return send_file( output, as_attachment=True, download_name='all_tables.xlsx', mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ) @app.route('/download_mt940') def download_mt940(): df = get_merged_df('export') mt940_string = generate_mt940( df, account_iban = "11223344/55667788", transaction_ref = "REFEXCELEXPORT", statement_number= "00000", opening_balance = 0.0, txn_code = "NMSC", txn_ref = "NONREF", info_prefix = "169?00RAISENOW??20" ) output = BytesIO() output.write(mt940_string.encode('utf-8')) output.seek(0) return send_file( output, as_attachment=True, download_name='export_mt940.txt', mimetype='text/plain' ) @app.route('/clear_session', methods=['POST']) def clear_session(): """ Clear all session data and reset server-side stored DataFrames. """ session.clear() return jsonify({'status': 'session cleared'}) def wrap_string(text: str, max_length: int = 65) -> str: """ Wraps `text` at exactly `max_length` characters, inserting "\r\n" every max_length chars. Existing line breaks are preserved (each line is wrapped separately). """ wrapped_lines = [] # split on any existing newline (handles "\n", "\r\n", etc.) for line in text.splitlines(): # chop each line into max_length pieces for i in range(0, len(line), max_length): wrapped_lines.append(line[i : i + max_length]) # re-join with Windows-style breaks return "\r\n".join(wrapped_lines) def generate_mt940(df: pd.DataFrame, account_iban: str, transaction_ref: str, statement_number: str = None, opening_balance: float = 0.0, txn_code: str = 'NMSC', txn_ref: str = 'NONREF', info_prefix: str = None) -> str: """ Generate an MT940 text statement in the “custom” style shown above. Parameters ---------- df : pd.DataFrame Columns required: - 'booking_date' (datetime or str YYYY-MM-DD) - 'amount' (float) - 'currency' (str, e.g. 'EUR') - 'sign' (str, 'CRDT' or 'DBIT') - 'reference' (str, used inside your info_prefix section) - 'name' (str, appended after “?32” in tag 86) account_iban : str Goes into tag 25 exactly as you want it (e.g. "11223344/55667788"). transaction_ref : str Tag 20 (e.g. "REFEXCELEXPORT") statement_number : str, optional If given, used verbatim for tag 28C (e.g. "00000"); otherwise falls back to "{transaction_ref}/1" opening_balance : float, optional Starting balance for tag 60F txn_code : str, optional The 3-letter code in your :61: line (default "NMSC") txn_ref : str, optional The literal reference after that code (default "NONREF") info_prefix : str, optional If you set e.g. "169?00RAISENOW??20", your tag 86 lines become 169?00RAISENOW??20 ?32 If you leave it `None`, we fall back to a simple ` ` join. Returns ------- A single string with CRLF line endings. """ # normalize & sort df2 = df.copy() df2['booking_date'] = pd.to_datetime(df2['booking_date']) df2.sort_values('booking_date', inplace=True) # constant currency currency = df2['currency'].iat[0] lines = [] # header lines.append(f":20:{transaction_ref}") lines.append(f":25:{account_iban}") if statement_number is None: lines.append(f":28C:{transaction_ref}/1") else: lines.append(f":28C:{statement_number}") # opening balance first_dt = df2['booking_date'].iat[0] ob_sign = 'C' if opening_balance >= 0 else 'D' ob_amt = abs(opening_balance) ob_str = f"{ob_amt:.2f}".replace('.', ',') lines.append(f":60F:{ob_sign}{first_dt.strftime('%y%m%d')}{currency}{ob_str}") # transactions for _, row in df2.iterrows(): bd = row['booking_date'] sign = 'C' if row['sign']=='CRDT' else 'D' amt = abs(row['amount']) amt_str = f"{amt:.2f}".replace('.', ',') # :61:YYMMDDMMDD[C|D]amount lines.append( f":61:{bd.strftime('%y%m%d')}" f"{bd.strftime('%m%d')}" f"{sign}{amt_str}" f"{txn_code}{txn_ref}" ) # :86: either structured or simple fallback raw_rem = row.get('reference', '') raw_name = row.get('name', '') rem = '' if pd.isna(raw_rem) else str(raw_rem) name = '' if pd.isna(raw_name) else str(raw_name) if info_prefix: # your “169?00RAISENOW??20 ?32” lines.append(f":86:{info_prefix}{rem} ?32{name}") else: # old-style " " info = " ".join(filter(None, [name, rem])) lines.append(f":86:{info}") # closing balance net_mv = sum( row['amount'] if row['sign']=='CRDT' else -row['amount'] for _, row in df2.iterrows() ) closing = opening_balance + net_mv cb_sign = 'C' if closing >= 0 else 'D' cb_amt = abs(closing) cb_str = f"{cb_amt:.2f}".replace('.', ',') last_dt = df2['booking_date'].iat[-1] lines.append(f":62F:{cb_sign}{last_dt.strftime('%y%m%d')}{currency}{cb_str}") file_str = "\r\n".join(lines) return wrap_string(file_str) if __name__ == '__main__': app.run(debug=True)