diff --git a/app/app.py b/app/app.py
index 75556f5..58414f2 100644
--- a/app/app.py
+++ b/app/app.py
@@ -3,6 +3,7 @@ import pandas as pd
import numpy as np
from io import BytesIO
from flask_session import Session
+from datetime import datetime
app = Flask(__name__)
app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key
@@ -37,35 +38,72 @@ def get_merged_df(table_name):
"""
# --- load & normalize Stripe ---
- stripe = get_dataframe('stripe_import')
- if not stripe.empty:
- stripe = (
- stripe
- .query("Type == 'Charge'")
- .copy()
- )
- else:
- return stripe
+ stripe_import = get_dataframe('stripe_import')
- stripe['idx_stripe'] = stripe.index
- stripe['norm_date'] = pd.to_datetime(stripe['Created'], format='%Y-%m-%d %H:%M')
- stripe['norm_amount'] = stripe['Amount'].astype(str).str.replace(',', '.').astype(float)
- stripe['norm_email'] = stripe['Customer Email'].astype(str)
- stripe['norm_name'] = stripe.apply(
+ if stripe_import.empty:
+ return pd.DataFrame()
+
+ stripe_charge = (
+ stripe_import
+ .query("Type == 'Charge'")
+ .copy()
+ )
+
+ stripe_adjustment = (
+ stripe_import
+ .query("Type == 'Adjustment'")
+ .copy()
+ )
+
+ stripe_refund = (
+ stripe_import
+ .query("Type == 'Payment Failure Refund'")
+ .copy()
+ )
+
+ stripe_stripeFee = (
+ stripe_import
+ .query("Type == 'Stripe Fee'")
+ .copy()
+ )
+
+ # sum up the fees
+ total_stripe_charge_fees = stripe_charge['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+ total_stripe_adjustment_fees = stripe_adjustment['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+ total_stripe_refund_fees = stripe_refund['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+ total_stripe_stripeFee_fees = stripe_stripeFee['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+
+ stripe_adjustment['norm_date'] = pd.to_datetime(stripe_adjustment['Created'], format='%Y-%m-%d %H:%M')
+ stripe_adjustment['norm_amount'] = stripe_adjustment['Amount'].astype(str).str.replace(',', '.').astype(float)
+ stripe_adjustment['norm_zweck'] = "Korrekturen"
+
+ stripe_stripeFee['norm_date'] = pd.to_datetime(stripe_stripeFee['Created'], format='%Y-%m-%d %H:%M')
+ stripe_stripeFee['norm_amount'] = stripe_stripeFee['Amount'].astype(str).str.replace(',', '.').astype(float)
+ stripe_stripeFee['norm_zweck'] = "Stripe"
+
+ # Extract the “py_…” token from stripe_refund description
+ stripe_refund['norm_payment_id'] = stripe_refund['Description'].str.extract(r'(py_[A-Za-z0-9]+)')
+ # Build a list of all extracted py_ IDs
+ pyids = stripe_refund['norm_payment_id'].dropna().unique().tolist()
+ # Remove from stripe_charge any row whose ID is in that list
+ stripe_charge = stripe_charge[~stripe_charge['ID'].isin(pyids)]
+
+ stripe_charge['idx_stripe'] = stripe_charge.index
+ stripe_charge['norm_date'] = pd.to_datetime(stripe_charge['Created'], format='%Y-%m-%d %H:%M')
+ stripe_charge['norm_amount'] = stripe_charge['Amount'].astype(str).str.replace(',', '.').astype(float)
+ stripe_charge['norm_email'] = stripe_charge['Customer Email'].fillna('').astype(str)
+ stripe_charge['norm_name'] = stripe_charge.apply(
lambda r: r['Customer Name'] or r['Details'], axis=1
)
# --- load & normalize Raisenow ---
- raisenow = get_dataframe('raisenow_import')
- if not raisenow.empty:
- raisenow = (
- raisenow
- .query("Zahlungsmethode != 'paypal'")
- .query("Status == 'succeeded'")
- .copy()
- )
- else:
- return raisenow
+ raisenow_import = get_dataframe('raisenow_import')
+ raisenow = (
+ raisenow_import
+ .query("Zahlungsmethode != 'paypal'")
+ .query("Status == 'succeeded'")
+ .copy()
+ )
raisenow['idx_raisenow'] = raisenow.index
raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M')
@@ -88,19 +126,22 @@ def get_merged_df(table_name):
# --- return raw tables if requested ---
if table_name == 'stripe_import':
- return stripe.dropna(axis=1, how='all')
+ return stripe_import.dropna(axis=1, how='all')
if table_name == 'raisenow_import':
- return raisenow.dropna(axis=1, how='all')
+ return raisenow_import.dropna(axis=1, how='all')
# --- 1) Greedy exact same-day matches ---
pairs = []
# index Raisenow rows for fast lookup + dropping
rr = raisenow.set_index('idx_raisenow')
- for _, s in stripe.iterrows():
+ for _, s in stripe_charge.iterrows():
# filter candidates by amount & name
cand = rr[
(rr['norm_amount'] == s['norm_amount']) &
- (rr['norm_name'] == s['norm_name'])
+ (
+ (rr['norm_name'] == s['norm_name']) |
+ (rr['norm_email'] == s['norm_email'])
+ )
].copy()
if cand.empty:
continue
@@ -115,11 +156,14 @@ def get_merged_df(table_name):
# --- 2) Greedy fuzzy ±1-day matches on remaining rows ---
used_stripe = {s for s, _ in pairs}
- stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy()
+ stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()
for _, s in stripe_left.iterrows():
cand = rr[
(rr['norm_amount'] == s['norm_amount']) &
- (rr['norm_name'] == s['norm_name'])
+ (
+ (rr['norm_name'] == s['norm_name']) |
+ (rr['norm_email'] == s['norm_email'])
+ )
].copy()
if cand.empty:
continue
@@ -132,10 +176,72 @@ def get_merged_df(table_name):
pairs.append((int(s['idx_stripe']), int(best)))
rr = rr.drop(best)
- # --- build the merged DataFrame without suffixes ---
+
+
+
+
+
+ # --- 3) Unique amount & exact-date-only matches ---
+ # Recompute “leftovers” now after steps 1+2
+ used_stripe = {s for s, _ in pairs}
+ stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()
+
+ # Prep for grouping
+ stripe_left['norm_date_norm'] = stripe_left['norm_date'].dt.normalize()
+ rr_df = rr.reset_index()
+ rr_df['norm_date_norm'] = rr_df['norm_date'].dt.normalize()
+
+ # Count how many per (amount, date) in each
+ stripe_counts = (
+ stripe_left
+ .groupby(['norm_amount','norm_date_norm'])
+ .size()
+ .reset_index(name='stripe_count')
+ )
+ rr_counts = (
+ rr_df
+ .groupby(['norm_amount','norm_date_norm'])
+ .size()
+ .reset_index(name='rr_count')
+ )
+
+ # Find the pairs where both counts == 1
+ unique_keys = pd.merge(stripe_counts, rr_counts,
+ on=['norm_amount','norm_date_norm'])
+ unique_keys = unique_keys[
+ (unique_keys['stripe_count'] == 1) &
+ (unique_keys['rr_count'] == 1)
+ ]
+
+ # Pull those exact singletons through
+ for _, u in unique_keys.iterrows():
+ amt = u['norm_amount']
+ d = u['norm_date_norm']
+ srow = stripe_left[
+ (stripe_left['norm_amount'] == amt) &
+ (stripe_left['norm_date_norm'] == d)
+ ].iloc[0]
+ rrow = rr_df[
+ (rr_df['norm_amount'] == amt) &
+ (rr_df['norm_date_norm'] == d)
+ ].iloc[0]
+ pairs.append((int(srow['idx_stripe']), int(rrow['idx_raisenow'])))
+ rr = rr.drop(rrow['idx_raisenow'])
+
+
+
+
+
+
+
+
+
+
+
+ # --- build the merged DataFrame ---
merged_rows = []
for s_idx, r_idx in pairs:
- srow = stripe.loc[s_idx].to_dict()
+ srow = stripe_charge.loc[s_idx].to_dict()
rrow = raisenow.loc[r_idx].to_dict()
# drop any overlapping keys so we never get suffixes
for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']:
@@ -146,15 +252,47 @@ def get_merged_df(table_name):
combined = pd.DataFrame(merged_rows)
+ starting_columns = ['norm_name', 'norm_date', 'norm_email', 'norm_amount', 'norm_zweck']
+ # reorder columns to put the most important ones first
+ combined = pd.concat([
+ combined[starting_columns],
+ combined.drop(columns=starting_columns)
+ ], axis=1)
+
+
# --- slice out the requested view ---
if table_name == 'merged':
result = combined
elif table_name == 'stripe_only':
used = {s for s, _ in pairs}
- result = stripe[~stripe['idx_stripe'].isin(used)]
+ result = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]
elif table_name == 'raisenow_only':
used = {r for _, r in pairs}
result = raisenow[~raisenow['idx_raisenow'].isin(used)]
+ elif table_name == 'export':
+ used = {s for s, _ in pairs}
+ stripe_only = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]
+ result = pd.concat([combined, stripe_only, stripe_adjustment, stripe_stripeFee], ignore_index=True)
+ # add the Stripe fees to the end of the table
+ new_rows = [
+ {'norm_zweck': 'Buchungsgebühren', 'norm_amount': total_stripe_charge_fees * (-1)},
+ {'norm_zweck': 'Rückbuchungsgebühren', 'norm_amount': total_stripe_refund_fees * (-1)},
+ {'norm_zweck': 'Korrekturgebühren', 'norm_amount': total_stripe_adjustment_fees * (-1)},
+ {'norm_zweck': 'Stripe Gebühren', 'norm_amount': total_stripe_stripeFee_fees * (-1)}
+ ]
+ new_rows_df = pd.DataFrame(new_rows)
+ result = pd.concat([result, new_rows_df], ignore_index=True)
+
+ # fix empty name values
+ for i, row in result.iterrows():
+ if pd.isna(row.get('norm_name')) and pd.notna(row.get('Vorname')) and pd.notna(row.get('Nachname')):
+ result.at[i, 'norm_name'] = f"{row.get('Vorname')} {row.get('Nachname')}".strip()
+
+ # fix empty email values
+ for i, row in result.iterrows():
+ if (pd.isna(row.get('norm_email')) or row.get('norm_email') == '') and pd.notna(row.get('E-Mail-Adresse')):
+ result.at[i, 'norm_email'] = f"{row.get('E-Mail-Adresse')}".strip()
+
else:
raise ValueError(f"Unknown table_name '{table_name}'")
@@ -220,7 +358,8 @@ def download():
'raisenow_import',
'merged',
'stripe_only',
- 'raisenow_only'
+ 'raisenow_only',
+ 'export'
]
}
@@ -259,5 +398,99 @@ def clear_session():
session.clear()
return jsonify({'status': 'session cleared'})
+def export_to_special_format(
+ df: pd.DataFrame,
+ reference: str,
+ account: str,
+ statement_number: int,
+ opening_date: datetime,
+ opening_balance: float,
+ currency: str,
+ closing_date: datetime = None,
+ closing_balance: float = None
+) -> str:
+ """
+ Convert a DataFrame of transactions into the special SWIFT-like file format.
+
+ Parameters
+ ----------
+ df : pd.DataFrame
+ Must contain columns:
+ - 'value_date' (datetime)
+ - 'booking_date' (datetime)
+ - 'dc' (str): 'C' for credit, 'D' for debit
+ - 'amount' (float)
+ - optional 'transaction_code' (str)
+ - optional 'bank_reference' (str)
+ - 'narrative' (str)
+ reference : str
+ Message reference for :20:
+ account : str
+ Account number for :25:
+ statement_number : int
+ Statement sequence for :28C: (will be zero-padded to 5 digits)
+ opening_date : datetime
+ Opening balance date
+ opening_balance : float
+ Opening balance amount (positive)
+ currency : str
+ Three-letter currency code (e.g. 'EUR')
+ closing_date : datetime, optional
+ Closing balance date
+ closing_balance : float, optional
+ Closing balance amount (positive)
+
+ Returns
+ -------
+ str
+ The formatted file content.
+ """
+ lines = []
+ # Header
+ lines.append(f":20:{reference}")
+ lines.append(f":25:{account}")
+ lines.append(f":28C:{statement_number:05d}")
+
+ # Opening balance :60F:
+ od = opening_date.strftime('%y%m%d')
+ ob = f"{opening_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
+ lines.append(f":60F:C{od}{currency}{ob}")
+
+ # Transactions
+ for _, row in df.iterrows():
+ vd = row['value_date'].strftime('%y%m%d')
+ bd = row['booking_date'].strftime('%m%d')
+ dc = row['dc']
+ amt = f"{row['amount']:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
+ tcode = row.get('transaction_code', '')
+ bref = row.get('bank_reference', '')
+ lines.append(f":61:{vd}{bd}{dc}{amt}{tcode}{bref}")
+ lines.append(f":86:{row['narrative']}")
+
+ # Closing balance :62F:
+ if closing_date and closing_balance is not None:
+ cd = closing_date.strftime('%y%m%d')
+ cb = f"{closing_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
+ lines.append(f":62F:C{cd}{currency}{cb}")
+
+ return "\n".join(lines)
+
+# Example usage:
+# df = pd.DataFrame([...])
+# content = export_to_special_format(
+# df,
+# reference='REFEXCELEXPORT',
+# account='11223344/55667788',
+# statement_number=0,
+# opening_date=datetime(2025,3,6),
+# opening_balance=0.00,
+# currency='EUR',
+# closing_date=datetime(2025,3,6),
+# closing_balance=12048.71
+# )
+# with open('statement.txt', 'w') as f:
+# f.write(content)
+
+
if __name__ == '__main__':
app.run(debug=True)
diff --git a/app/templates/index.html b/app/templates/index.html
index 999ba42..5647048 100644
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -49,6 +49,7 @@
+