From 2b8d7c98657600e448183fea1c2c568a66e3ae02 Mon Sep 17 00:00:00 2001 From: lelo Date: Fri, 23 May 2025 23:51:48 +0200 Subject: [PATCH] update a lot --- app/app.py | 301 ++++++++++++++++++++++++++++++++++----- app/templates/index.html | 1 + 2 files changed, 268 insertions(+), 34 deletions(-) diff --git a/app/app.py b/app/app.py index 75556f5..58414f2 100644 --- a/app/app.py +++ b/app/app.py @@ -3,6 +3,7 @@ import pandas as pd import numpy as np from io import BytesIO from flask_session import Session +from datetime import datetime app = Flask(__name__) app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key @@ -37,35 +38,72 @@ def get_merged_df(table_name): """ # --- load & normalize Stripe --- - stripe = get_dataframe('stripe_import') - if not stripe.empty: - stripe = ( - stripe - .query("Type == 'Charge'") - .copy() - ) - else: - return stripe + stripe_import = get_dataframe('stripe_import') - stripe['idx_stripe'] = stripe.index - stripe['norm_date'] = pd.to_datetime(stripe['Created'], format='%Y-%m-%d %H:%M') - stripe['norm_amount'] = stripe['Amount'].astype(str).str.replace(',', '.').astype(float) - stripe['norm_email'] = stripe['Customer Email'].astype(str) - stripe['norm_name'] = stripe.apply( + if stripe_import.empty: + return pd.DataFrame() + + stripe_charge = ( + stripe_import + .query("Type == 'Charge'") + .copy() + ) + + stripe_adjustment = ( + stripe_import + .query("Type == 'Adjustment'") + .copy() + ) + + stripe_refund = ( + stripe_import + .query("Type == 'Payment Failure Refund'") + .copy() + ) + + stripe_stripeFee = ( + stripe_import + .query("Type == 'Stripe Fee'") + .copy() + ) + + # sum up the fees + total_stripe_charge_fees = stripe_charge['Fees'].astype(str).str.replace(',', '.').astype(float).sum() + total_stripe_adjustment_fees = stripe_adjustment['Fees'].astype(str).str.replace(',', '.').astype(float).sum() + total_stripe_refund_fees = stripe_refund['Fees'].astype(str).str.replace(',', '.').astype(float).sum() + total_stripe_stripeFee_fees = stripe_stripeFee['Fees'].astype(str).str.replace(',', '.').astype(float).sum() + + stripe_adjustment['norm_date'] = pd.to_datetime(stripe_adjustment['Created'], format='%Y-%m-%d %H:%M') + stripe_adjustment['norm_amount'] = stripe_adjustment['Amount'].astype(str).str.replace(',', '.').astype(float) + stripe_adjustment['norm_zweck'] = "Korrekturen" + + stripe_stripeFee['norm_date'] = pd.to_datetime(stripe_stripeFee['Created'], format='%Y-%m-%d %H:%M') + stripe_stripeFee['norm_amount'] = stripe_stripeFee['Amount'].astype(str).str.replace(',', '.').astype(float) + stripe_stripeFee['norm_zweck'] = "Stripe" + + # Extract the “py_…” token from stripe_refund description + stripe_refund['norm_payment_id'] = stripe_refund['Description'].str.extract(r'(py_[A-Za-z0-9]+)') + # Build a list of all extracted py_ IDs + pyids = stripe_refund['norm_payment_id'].dropna().unique().tolist() + # Remove from stripe_charge any row whose ID is in that list + stripe_charge = stripe_charge[~stripe_charge['ID'].isin(pyids)] + + stripe_charge['idx_stripe'] = stripe_charge.index + stripe_charge['norm_date'] = pd.to_datetime(stripe_charge['Created'], format='%Y-%m-%d %H:%M') + stripe_charge['norm_amount'] = stripe_charge['Amount'].astype(str).str.replace(',', '.').astype(float) + stripe_charge['norm_email'] = stripe_charge['Customer Email'].fillna('').astype(str) + stripe_charge['norm_name'] = stripe_charge.apply( lambda r: r['Customer Name'] or r['Details'], axis=1 ) # --- load & normalize Raisenow --- - raisenow = get_dataframe('raisenow_import') - if not raisenow.empty: - raisenow = ( - raisenow - .query("Zahlungsmethode != 'paypal'") - .query("Status == 'succeeded'") - .copy() - ) - else: - return raisenow + raisenow_import = get_dataframe('raisenow_import') + raisenow = ( + raisenow_import + .query("Zahlungsmethode != 'paypal'") + .query("Status == 'succeeded'") + .copy() + ) raisenow['idx_raisenow'] = raisenow.index raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M') @@ -88,19 +126,22 @@ def get_merged_df(table_name): # --- return raw tables if requested --- if table_name == 'stripe_import': - return stripe.dropna(axis=1, how='all') + return stripe_import.dropna(axis=1, how='all') if table_name == 'raisenow_import': - return raisenow.dropna(axis=1, how='all') + return raisenow_import.dropna(axis=1, how='all') # --- 1) Greedy exact same-day matches --- pairs = [] # index Raisenow rows for fast lookup + dropping rr = raisenow.set_index('idx_raisenow') - for _, s in stripe.iterrows(): + for _, s in stripe_charge.iterrows(): # filter candidates by amount & name cand = rr[ (rr['norm_amount'] == s['norm_amount']) & - (rr['norm_name'] == s['norm_name']) + ( + (rr['norm_name'] == s['norm_name']) | + (rr['norm_email'] == s['norm_email']) + ) ].copy() if cand.empty: continue @@ -115,11 +156,14 @@ def get_merged_df(table_name): # --- 2) Greedy fuzzy ±1-day matches on remaining rows --- used_stripe = {s for s, _ in pairs} - stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy() + stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy() for _, s in stripe_left.iterrows(): cand = rr[ (rr['norm_amount'] == s['norm_amount']) & - (rr['norm_name'] == s['norm_name']) + ( + (rr['norm_name'] == s['norm_name']) | + (rr['norm_email'] == s['norm_email']) + ) ].copy() if cand.empty: continue @@ -132,10 +176,72 @@ def get_merged_df(table_name): pairs.append((int(s['idx_stripe']), int(best))) rr = rr.drop(best) - # --- build the merged DataFrame without suffixes --- + + + + + + # --- 3) Unique amount & exact-date-only matches --- + # Recompute “leftovers” now after steps 1+2 + used_stripe = {s for s, _ in pairs} + stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy() + + # Prep for grouping + stripe_left['norm_date_norm'] = stripe_left['norm_date'].dt.normalize() + rr_df = rr.reset_index() + rr_df['norm_date_norm'] = rr_df['norm_date'].dt.normalize() + + # Count how many per (amount, date) in each + stripe_counts = ( + stripe_left + .groupby(['norm_amount','norm_date_norm']) + .size() + .reset_index(name='stripe_count') + ) + rr_counts = ( + rr_df + .groupby(['norm_amount','norm_date_norm']) + .size() + .reset_index(name='rr_count') + ) + + # Find the pairs where both counts == 1 + unique_keys = pd.merge(stripe_counts, rr_counts, + on=['norm_amount','norm_date_norm']) + unique_keys = unique_keys[ + (unique_keys['stripe_count'] == 1) & + (unique_keys['rr_count'] == 1) + ] + + # Pull those exact singletons through + for _, u in unique_keys.iterrows(): + amt = u['norm_amount'] + d = u['norm_date_norm'] + srow = stripe_left[ + (stripe_left['norm_amount'] == amt) & + (stripe_left['norm_date_norm'] == d) + ].iloc[0] + rrow = rr_df[ + (rr_df['norm_amount'] == amt) & + (rr_df['norm_date_norm'] == d) + ].iloc[0] + pairs.append((int(srow['idx_stripe']), int(rrow['idx_raisenow']))) + rr = rr.drop(rrow['idx_raisenow']) + + + + + + + + + + + + # --- build the merged DataFrame --- merged_rows = [] for s_idx, r_idx in pairs: - srow = stripe.loc[s_idx].to_dict() + srow = stripe_charge.loc[s_idx].to_dict() rrow = raisenow.loc[r_idx].to_dict() # drop any overlapping keys so we never get suffixes for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']: @@ -146,15 +252,47 @@ def get_merged_df(table_name): combined = pd.DataFrame(merged_rows) + starting_columns = ['norm_name', 'norm_date', 'norm_email', 'norm_amount', 'norm_zweck'] + # reorder columns to put the most important ones first + combined = pd.concat([ + combined[starting_columns], + combined.drop(columns=starting_columns) + ], axis=1) + + # --- slice out the requested view --- if table_name == 'merged': result = combined elif table_name == 'stripe_only': used = {s for s, _ in pairs} - result = stripe[~stripe['idx_stripe'].isin(used)] + result = stripe_charge[~stripe_charge['idx_stripe'].isin(used)] elif table_name == 'raisenow_only': used = {r for _, r in pairs} result = raisenow[~raisenow['idx_raisenow'].isin(used)] + elif table_name == 'export': + used = {s for s, _ in pairs} + stripe_only = stripe_charge[~stripe_charge['idx_stripe'].isin(used)] + result = pd.concat([combined, stripe_only, stripe_adjustment, stripe_stripeFee], ignore_index=True) + # add the Stripe fees to the end of the table + new_rows = [ + {'norm_zweck': 'Buchungsgebühren', 'norm_amount': total_stripe_charge_fees * (-1)}, + {'norm_zweck': 'Rückbuchungsgebühren', 'norm_amount': total_stripe_refund_fees * (-1)}, + {'norm_zweck': 'Korrekturgebühren', 'norm_amount': total_stripe_adjustment_fees * (-1)}, + {'norm_zweck': 'Stripe Gebühren', 'norm_amount': total_stripe_stripeFee_fees * (-1)} + ] + new_rows_df = pd.DataFrame(new_rows) + result = pd.concat([result, new_rows_df], ignore_index=True) + + # fix empty name values + for i, row in result.iterrows(): + if pd.isna(row.get('norm_name')) and pd.notna(row.get('Vorname')) and pd.notna(row.get('Nachname')): + result.at[i, 'norm_name'] = f"{row.get('Vorname')} {row.get('Nachname')}".strip() + + # fix empty email values + for i, row in result.iterrows(): + if (pd.isna(row.get('norm_email')) or row.get('norm_email') == '') and pd.notna(row.get('E-Mail-Adresse')): + result.at[i, 'norm_email'] = f"{row.get('E-Mail-Adresse')}".strip() + else: raise ValueError(f"Unknown table_name '{table_name}'") @@ -220,7 +358,8 @@ def download(): 'raisenow_import', 'merged', 'stripe_only', - 'raisenow_only' + 'raisenow_only', + 'export' ] } @@ -259,5 +398,99 @@ def clear_session(): session.clear() return jsonify({'status': 'session cleared'}) +def export_to_special_format( + df: pd.DataFrame, + reference: str, + account: str, + statement_number: int, + opening_date: datetime, + opening_balance: float, + currency: str, + closing_date: datetime = None, + closing_balance: float = None +) -> str: + """ + Convert a DataFrame of transactions into the special SWIFT-like file format. + + Parameters + ---------- + df : pd.DataFrame + Must contain columns: + - 'value_date' (datetime) + - 'booking_date' (datetime) + - 'dc' (str): 'C' for credit, 'D' for debit + - 'amount' (float) + - optional 'transaction_code' (str) + - optional 'bank_reference' (str) + - 'narrative' (str) + reference : str + Message reference for :20: + account : str + Account number for :25: + statement_number : int + Statement sequence for :28C: (will be zero-padded to 5 digits) + opening_date : datetime + Opening balance date + opening_balance : float + Opening balance amount (positive) + currency : str + Three-letter currency code (e.g. 'EUR') + closing_date : datetime, optional + Closing balance date + closing_balance : float, optional + Closing balance amount (positive) + + Returns + ------- + str + The formatted file content. + """ + lines = [] + # Header + lines.append(f":20:{reference}") + lines.append(f":25:{account}") + lines.append(f":28C:{statement_number:05d}") + + # Opening balance :60F: + od = opening_date.strftime('%y%m%d') + ob = f"{opening_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '') + lines.append(f":60F:C{od}{currency}{ob}") + + # Transactions + for _, row in df.iterrows(): + vd = row['value_date'].strftime('%y%m%d') + bd = row['booking_date'].strftime('%m%d') + dc = row['dc'] + amt = f"{row['amount']:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '') + tcode = row.get('transaction_code', '') + bref = row.get('bank_reference', '') + lines.append(f":61:{vd}{bd}{dc}{amt}{tcode}{bref}") + lines.append(f":86:{row['narrative']}") + + # Closing balance :62F: + if closing_date and closing_balance is not None: + cd = closing_date.strftime('%y%m%d') + cb = f"{closing_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '') + lines.append(f":62F:C{cd}{currency}{cb}") + + return "\n".join(lines) + +# Example usage: +# df = pd.DataFrame([...]) +# content = export_to_special_format( +# df, +# reference='REFEXCELEXPORT', +# account='11223344/55667788', +# statement_number=0, +# opening_date=datetime(2025,3,6), +# opening_balance=0.00, +# currency='EUR', +# closing_date=datetime(2025,3,6), +# closing_balance=12048.71 +# ) +# with open('statement.txt', 'w') as f: +# f.write(content) + + if __name__ == '__main__': app.run(debug=True) diff --git a/app/templates/index.html b/app/templates/index.html index 999ba42..5647048 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -49,6 +49,7 @@ +