update a lot

2025-05-23 23:51:48 +02:00 · 2025-05-23 23:51:48 +02:00 · 2b8d7c9865
commit 2b8d7c9865
parent de75260fe7
2 changed files with 268 additions and 34 deletions
--- a/app/app.py
+++ b/app/app.py
@ -3,6 +3,7 @@ import pandas as pd
 import numpy as np
 from io import BytesIO
 from flask_session import Session
+from datetime import datetime

 app = Flask(__name__)
 app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw"  # replace with a secure random key
@ -37,35 +38,72 @@ def get_merged_df(table_name):
    """

    # --- load & normalize Stripe ---
-    stripe = get_dataframe('stripe_import')
-    if not stripe.empty:
-        stripe = (
-            stripe
-            .query("Type == 'Charge'")
-            .copy()
-        )
-    else:
-        return stripe
+    stripe_import = get_dataframe('stripe_import')
    
-    stripe['idx_stripe']  = stripe.index
-    stripe['norm_date']   = pd.to_datetime(stripe['Created'],    format='%Y-%m-%d %H:%M')
-    stripe['norm_amount'] = stripe['Amount'].astype(str).str.replace(',', '.').astype(float)
-    stripe['norm_email']  = stripe['Customer Email'].astype(str)
-    stripe['norm_name']   = stripe.apply(
+    if stripe_import.empty:
+        return pd.DataFrame()
+
+    stripe_charge = (
+        stripe_import
+        .query("Type == 'Charge'")
+        .copy()
+    )
+
+    stripe_adjustment = (
+        stripe_import
+        .query("Type == 'Adjustment'")
+        .copy()
+    )
+    
+    stripe_refund = (
+        stripe_import
+        .query("Type == 'Payment Failure Refund'")
+        .copy()
+    )
+
+    stripe_stripeFee = (
+        stripe_import
+        .query("Type == 'Stripe Fee'")
+        .copy()
+    )
+
+    # sum up the fees
+    total_stripe_charge_fees     = stripe_charge['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+    total_stripe_adjustment_fees = stripe_adjustment['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+    total_stripe_refund_fees     = stripe_refund['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+    total_stripe_stripeFee_fees  = stripe_stripeFee['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
+
+    stripe_adjustment['norm_date']   = pd.to_datetime(stripe_adjustment['Created'],    format='%Y-%m-%d %H:%M')
+    stripe_adjustment['norm_amount'] = stripe_adjustment['Amount'].astype(str).str.replace(',', '.').astype(float)
+    stripe_adjustment['norm_zweck'] = "Korrekturen"
+
+    stripe_stripeFee['norm_date']   = pd.to_datetime(stripe_stripeFee['Created'],    format='%Y-%m-%d %H:%M')
+    stripe_stripeFee['norm_amount'] = stripe_stripeFee['Amount'].astype(str).str.replace(',', '.').astype(float)
+    stripe_stripeFee['norm_zweck'] = "Stripe"
+
+    # Extract the “py_…” token from stripe_refund description
+    stripe_refund['norm_payment_id'] = stripe_refund['Description'].str.extract(r'(py_[A-Za-z0-9]+)')
+    # Build a list of all extracted py_ IDs
+    pyids = stripe_refund['norm_payment_id'].dropna().unique().tolist()
+    # Remove from stripe_charge any row whose ID is in that list
+    stripe_charge = stripe_charge[~stripe_charge['ID'].isin(pyids)]
+
+    stripe_charge['idx_stripe']  = stripe_charge.index
+    stripe_charge['norm_date']   = pd.to_datetime(stripe_charge['Created'],    format='%Y-%m-%d %H:%M')
+    stripe_charge['norm_amount'] = stripe_charge['Amount'].astype(str).str.replace(',', '.').astype(float)
+    stripe_charge['norm_email']  = stripe_charge['Customer Email'].fillna('').astype(str)
+    stripe_charge['norm_name']   = stripe_charge.apply(
        lambda r: r['Customer Name'] or r['Details'], axis=1
    )
    
    # --- load & normalize Raisenow ---
-    raisenow = get_dataframe('raisenow_import')
-    if not raisenow.empty:
-        raisenow = (
-            raisenow
-            .query("Zahlungsmethode != 'paypal'")
-            .query("Status == 'succeeded'")
-            .copy()
-        )
-    else:
-        return raisenow
+    raisenow_import = get_dataframe('raisenow_import')
+    raisenow = (
+        raisenow_import
+        .query("Zahlungsmethode != 'paypal'")
+        .query("Status == 'succeeded'")
+        .copy()
+    )
    
    raisenow['idx_raisenow'] = raisenow.index
    raisenow['norm_date']    = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M')
@ -88,19 +126,22 @@ def get_merged_df(table_name):

    # --- return raw tables if requested ---
    if table_name == 'stripe_import':
-        return stripe.dropna(axis=1, how='all')
+        return stripe_import.dropna(axis=1, how='all')
    if table_name == 'raisenow_import':
-        return raisenow.dropna(axis=1, how='all')
+        return raisenow_import.dropna(axis=1, how='all')

    # --- 1) Greedy exact same-day matches ---
    pairs = []
    # index Raisenow rows for fast lookup + dropping
    rr = raisenow.set_index('idx_raisenow')
-    for _, s in stripe.iterrows():
+    for _, s in stripe_charge.iterrows():
        # filter candidates by amount & name
        cand = rr[
            (rr['norm_amount'] == s['norm_amount']) &
-            (rr['norm_name']   == s['norm_name'])
+            (
+                (rr['norm_name']  == s['norm_name']) |
+                (rr['norm_email'] == s['norm_email'])
+            )
        ].copy()
        if cand.empty:
            continue
@ -115,11 +156,14 @@ def get_merged_df(table_name):

    # --- 2) Greedy fuzzy ±1-day matches on remaining rows ---
    used_stripe = {s for s, _ in pairs}
-    stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy()
+    stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()
    for _, s in stripe_left.iterrows():
        cand = rr[
            (rr['norm_amount'] == s['norm_amount']) &
-            (rr['norm_name']   == s['norm_name'])
+            (
+                (rr['norm_name']  == s['norm_name']) |
+                (rr['norm_email'] == s['norm_email'])
+            )
        ].copy()
        if cand.empty:
            continue
@ -132,10 +176,72 @@ def get_merged_df(table_name):
        pairs.append((int(s['idx_stripe']), int(best)))
        rr = rr.drop(best)

-    # --- build the merged DataFrame without suffixes ---
+
+
+
+
+
+    # --- 3) Unique amount & exact-date-only matches ---
+    # Recompute “leftovers” now after steps 1+2
+    used_stripe = {s for s, _ in pairs}
+    stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()
+
+    # Prep for grouping
+    stripe_left['norm_date_norm'] = stripe_left['norm_date'].dt.normalize()
+    rr_df = rr.reset_index()
+    rr_df['norm_date_norm'] = rr_df['norm_date'].dt.normalize()
+
+    # Count how many per (amount, date) in each
+    stripe_counts = (
+        stripe_left
+        .groupby(['norm_amount','norm_date_norm'])
+        .size()
+        .reset_index(name='stripe_count')
+    )
+    rr_counts = (
+        rr_df
+        .groupby(['norm_amount','norm_date_norm'])
+        .size()
+        .reset_index(name='rr_count')
+    )
+
+    # Find the pairs where both counts == 1
+    unique_keys = pd.merge(stripe_counts, rr_counts,
+                        on=['norm_amount','norm_date_norm'])
+    unique_keys = unique_keys[
+        (unique_keys['stripe_count'] == 1) &
+        (unique_keys['rr_count']     == 1)
+    ]
+
+    # Pull those exact singletons through
+    for _, u in unique_keys.iterrows():
+        amt = u['norm_amount']
+        d   = u['norm_date_norm']
+        srow = stripe_left[
+            (stripe_left['norm_amount'] == amt) &
+            (stripe_left['norm_date_norm'] == d)
+        ].iloc[0]
+        rrow = rr_df[
+            (rr_df['norm_amount'] == amt) &
+            (rr_df['norm_date_norm'] == d)
+        ].iloc[0]
+        pairs.append((int(srow['idx_stripe']), int(rrow['idx_raisenow'])))
+        rr = rr.drop(rrow['idx_raisenow'])
+
+
+
+
+
+
+
+
+
+
+
+    # --- build the merged DataFrame ---
    merged_rows = []
    for s_idx, r_idx in pairs:
-        srow = stripe.loc[s_idx].to_dict()
+        srow = stripe_charge.loc[s_idx].to_dict()
        rrow = raisenow.loc[r_idx].to_dict()
        # drop any overlapping keys so we never get suffixes
        for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']:
@ -146,15 +252,47 @@ def get_merged_df(table_name):

    combined = pd.DataFrame(merged_rows)

+    starting_columns = ['norm_name', 'norm_date', 'norm_email', 'norm_amount', 'norm_zweck']
+    # reorder columns to put the most important ones first
+    combined = pd.concat([
+        combined[starting_columns],
+        combined.drop(columns=starting_columns)
+    ], axis=1)
+    
+
    # --- slice out the requested view ---
    if table_name == 'merged':
        result = combined
    elif table_name == 'stripe_only':
        used = {s for s, _ in pairs}
-        result = stripe[~stripe['idx_stripe'].isin(used)]
+        result = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]
    elif table_name == 'raisenow_only':
        used = {r for _, r in pairs}
        result = raisenow[~raisenow['idx_raisenow'].isin(used)]
+    elif table_name == 'export':
+        used = {s for s, _ in pairs}
+        stripe_only = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]        
+        result = pd.concat([combined, stripe_only, stripe_adjustment, stripe_stripeFee], ignore_index=True)
+        # add the Stripe fees to the end of the table
+        new_rows = [
+            {'norm_zweck': 'Buchungsgebühren', 'norm_amount': total_stripe_charge_fees * (-1)},
+            {'norm_zweck': 'Rückbuchungsgebühren', 'norm_amount': total_stripe_refund_fees * (-1)},
+            {'norm_zweck': 'Korrekturgebühren', 'norm_amount': total_stripe_adjustment_fees * (-1)},
+            {'norm_zweck': 'Stripe Gebühren', 'norm_amount': total_stripe_stripeFee_fees * (-1)}
+        ]
+        new_rows_df = pd.DataFrame(new_rows)
+        result = pd.concat([result, new_rows_df], ignore_index=True)
+        
+        # fix empty name values
+        for i, row in result.iterrows():
+            if pd.isna(row.get('norm_name')) and pd.notna(row.get('Vorname')) and pd.notna(row.get('Nachname')):
+                result.at[i, 'norm_name'] = f"{row.get('Vorname')} {row.get('Nachname')}".strip()
+
+        # fix empty email values
+        for i, row in result.iterrows():
+            if (pd.isna(row.get('norm_email')) or row.get('norm_email') == '') and pd.notna(row.get('E-Mail-Adresse')):
+                result.at[i, 'norm_email'] = f"{row.get('E-Mail-Adresse')}".strip()
+        
    else:
        raise ValueError(f"Unknown table_name '{table_name}'")

@ -220,7 +358,8 @@ def download():
            'raisenow_import',
            'merged',
            'stripe_only',
-            'raisenow_only'
+            'raisenow_only',
+            'export'
        ]
    }

@ -259,5 +398,99 @@ def clear_session():
    session.clear()
    return jsonify({'status': 'session cleared'})

+def export_to_special_format(
+    df: pd.DataFrame,
+    reference: str,
+    account: str,
+    statement_number: int,
+    opening_date: datetime,
+    opening_balance: float,
+    currency: str,
+    closing_date: datetime = None,
+    closing_balance: float = None
+) -> str:
+    """
+    Convert a DataFrame of transactions into the special SWIFT-like file format.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Must contain columns:
+        - 'value_date' (datetime)
+        - 'booking_date' (datetime)
+        - 'dc' (str): 'C' for credit, 'D' for debit
+        - 'amount' (float)
+        - optional 'transaction_code' (str)
+        - optional 'bank_reference' (str)
+        - 'narrative' (str)
+    reference : str
+        Message reference for :20:
+    account : str
+        Account number for :25:
+    statement_number : int
+        Statement sequence for :28C: (will be zero-padded to 5 digits)
+    opening_date : datetime
+        Opening balance date
+    opening_balance : float
+        Opening balance amount (positive)
+    currency : str
+        Three-letter currency code (e.g. 'EUR')
+    closing_date : datetime, optional
+        Closing balance date
+    closing_balance : float, optional
+        Closing balance amount (positive)
+
+    Returns
+    -------
+    str
+        The formatted file content.
+    """
+    lines = []
+    # Header
+    lines.append(f":20:{reference}")
+    lines.append(f":25:{account}")
+    lines.append(f":28C:{statement_number:05d}")
+
+    # Opening balance :60F:
+    od = opening_date.strftime('%y%m%d')
+    ob = f"{opening_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
+    lines.append(f":60F:C{od}{currency}{ob}")
+
+    # Transactions
+    for _, row in df.iterrows():
+        vd = row['value_date'].strftime('%y%m%d')
+        bd = row['booking_date'].strftime('%m%d')
+        dc  = row['dc']
+        amt = f"{row['amount']:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
+        tcode = row.get('transaction_code', '')
+        bref  = row.get('bank_reference', '')
+        lines.append(f":61:{vd}{bd}{dc}{amt}{tcode}{bref}")
+        lines.append(f":86:{row['narrative']}")
+
+    # Closing balance :62F:
+    if closing_date and closing_balance is not None:
+        cd = closing_date.strftime('%y%m%d')
+        cb = f"{closing_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
+        lines.append(f":62F:C{cd}{currency}{cb}")
+
+    return "\n".join(lines)
+
+# Example usage:
+# df = pd.DataFrame([...])
+# content = export_to_special_format(
+#     df,
+#     reference='REFEXCELEXPORT',
+#     account='11223344/55667788',
+#     statement_number=0,
+#     opening_date=datetime(2025,3,6),
+#     opening_balance=0.00,
+#     currency='EUR',
+#     closing_date=datetime(2025,3,6),
+#     closing_balance=12048.71
+# )
+# with open('statement.txt', 'w') as f:
+#     f.write(content)
+
+
 if __name__ == '__main__':
    app.run(debug=True)
--- a/app/templates/index.html
+++ b/app/templates/index.html
@ -49,6 +49,7 @@
          <option value="merged">Merged</option>
          <option value="stripe_only">Stripe Only</option>
          <option value="raisenow_only">RaiseNow Only</option>
+          <option value="export">Export</option>
        </select>
      </div>
    </div>