from flask import Flask, request, jsonify, render_template, send_file, session
import pandas as pd
import numpy as np
from io import BytesIO
from flask_session import Session
from datetime import datetime

app = Flask(__name__)
app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw"  # replace with a secure random key
app.config['SESSION_TYPE'] = 'filesystem'
app.config['SESSION_FILE_DIR'] = './.flask_session/'
Session(app)

STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount']
RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung']


def get_dataframe(key):
    """
    Load a DataFrame from session.
    """
    records = session.get(key, [])
    if records:
        df = pd.DataFrame(records)
    else:
        df = pd.DataFrame()
    return df


def get_merged_df(table_name): # return table_name: str
    """
    Return a DataFrame for the given table_name based on Stripe and Raisenow inputs,
    enforcing strict one-to-one matching with:
      - exact same-day matches first
      - then ±1-day fuzzy matches
      - no pandas merge suffixes at all
      - all original columns (including Raisenow's norm_zweck) preserved
    """
    print('calculate DataFrame...')
    # --- load & normalize Stripe ---
    stripe_import = get_dataframe('stripe_import')
    
    if stripe_import.empty:
        return pd.DataFrame()

    stripe_charge = (
        stripe_import
        .query("Type == 'Charge'")
        .copy()
    )

    stripe_adjustment = (
        stripe_import
        .query("Type == 'Adjustment'")
        .copy()
    )
    
    stripe_refund = (
        stripe_import
        .query("Type == 'Payment Failure Refund'")
        .copy()
    )

    stripe_stripeFee = (
        stripe_import
        .query("Type == 'Stripe Fee'")
        .copy()
    )

    # sum up the fees
    total_stripe_charge_fees     = stripe_charge['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
    total_stripe_adjustment_fees = stripe_adjustment['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
    total_stripe_refund_fees     = stripe_refund['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
    total_stripe_stripeFee_fees  = stripe_stripeFee['Fees'].astype(str).str.replace(',', '.').astype(float).sum()

    stripe_adjustment['norm_date']   = pd.to_datetime(stripe_adjustment['Created'],    format='%Y-%m-%d %H:%M')
    stripe_adjustment['norm_amount'] = stripe_adjustment['Amount'].astype(str).str.replace(',', '.').astype(float)
    stripe_adjustment['norm_zweck'] = "Korrekturen"
    stripe_adjustment['norm_name'] = "Verrechnung Korrekturen"
    stripe_adjustment['norm_currency'] = stripe_adjustment['Currency'].astype(str).str.upper()

    stripe_stripeFee['norm_date']   = pd.to_datetime(stripe_stripeFee['Created'],    format='%Y-%m-%d %H:%M')
    stripe_stripeFee['norm_amount'] = stripe_stripeFee['Amount'].astype(str).str.replace(',', '.').astype(float)
    stripe_stripeFee['norm_zweck'] = "Stripe"
    stripe_stripeFee['norm_name'] = "Verrechnung Stripe"
    stripe_stripeFee['norm_currency'] = stripe_stripeFee['Currency'].astype(str).str.upper()

    # Extract the “py_…” token from stripe_refund description
    stripe_refund['norm_payment_id'] = stripe_refund['Description'].str.extract(r'(py_[A-Za-z0-9]+)')
    # Build a list of all extracted py_ IDs
    pyids = stripe_refund['norm_payment_id'].dropna().unique().tolist()
    # Remove from stripe_charge any row whose ID is in that list
    stripe_charge = stripe_charge[~stripe_charge['ID'].isin(pyids)]

    stripe_charge['idx_stripe']  = stripe_charge.index
    stripe_charge['norm_date']   = pd.to_datetime(stripe_charge['Created'],    format='%Y-%m-%d %H:%M')
    stripe_charge['norm_amount'] = stripe_charge['Amount'].astype(str).str.replace(',', '.').astype(float)
    stripe_charge['norm_email']  = stripe_charge['Customer Email'].fillna('').astype(str)
    stripe_charge['norm_currency'] = stripe_charge['Currency'].astype(str).str.upper()
    stripe_charge['norm_name']   = stripe_charge.apply(
        lambda r: r['Customer Name'] or r['Details'], axis=1
    )
    
    # --- load & normalize Raisenow ---
    raisenow_import = get_dataframe('raisenow_import')
    raisenow = (
        raisenow_import
        .query("Zahlungsmethode != 'paypal'")
        .query("Status == 'succeeded'")
        .copy()
    )
    
    raisenow['idx_raisenow'] = raisenow.index
    raisenow['norm_date']    = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M')
    raisenow['norm_amount']  = raisenow['Betrag'].astype(float)
    raisenow['norm_email']   = raisenow['E-Mail-Adresse'].astype(str)
    raisenow['norm_name']    = raisenow['Vorname'].astype(str) + ' ' + raisenow['Nachname'].astype(str)
    raisenow['norm_currency'] = raisenow['Währung'].astype(str).str.upper()
    
    # start with two‐step assignment
    raisenow['norm_zweck'] = raisenow.apply(
        lambda r: r.get('custom_parameters.altruja_action_name')
                or r.get('custom_parameters.altruja_custom1_code'),
        axis=1
    )
    # additional assignment: build a mask of rows where norm_zweck is still empty/NaN
    mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '')
    if mask.any() and 'raisenow_parameters.product.source_url' in raisenow.columns:
        raisenow.loc[mask, 'norm_zweck'] = (
            raisenow.loc[mask, 'raisenow_parameters.product.source_url']
                .str.extract(r'https?://[^/]+/([^/?#]+)')[0]
        )

    # --- return raw tables if requested ---
    if table_name == 'stripe_import':
        return stripe_import.dropna(axis=1, how='all')
    if table_name == 'raisenow_import':
        return raisenow_import.dropna(axis=1, how='all')

    # --- 1) Greedy exact same-day matches ---
    pairs = []
    # index Raisenow rows for fast lookup + dropping
    rr = raisenow.set_index('idx_raisenow')
    for _, s in stripe_charge.iterrows():
        # filter candidates by amount & name
        cand = rr[
            (rr['norm_amount'] == s['norm_amount']) &
            (
                (rr['norm_name']  == s['norm_name']) |
                (rr['norm_email'] == s['norm_email'])
            )
        ].copy()
        if cand.empty:
            continue
        # compute absolute date difference (days only)
        date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
        exact_cand = cand[date_diff == pd.Timedelta(0)]
        if not exact_cand.empty:
            # pick the first exact match
            best = exact_cand.index[0]
            pairs.append((int(s['idx_stripe']), int(best)))
            rr = rr.drop(best)

    # --- 2) Greedy fuzzy ±1-day matches on remaining rows ---
    used_stripe = {s for s, _ in pairs}
    stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()
    for _, s in stripe_left.iterrows():
        cand = rr[
            (rr['norm_amount'] == s['norm_amount']) &
            (
                (rr['norm_name']  == s['norm_name']) |
                (rr['norm_email'] == s['norm_email'])
            )
        ].copy()
        if cand.empty:
            continue
        date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
        cand = cand[date_diff <= pd.Timedelta(days=1)]
        if cand.empty:
            continue
        # pick the one with the smallest gap
        best = date_diff.idxmin()
        pairs.append((int(s['idx_stripe']), int(best)))
        rr = rr.drop(best)






    # --- 3) Unique amount & exact-date-only matches ---
    # Recompute “leftovers” now after steps 1+2
    used_stripe = {s for s, _ in pairs}
    stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()

    # Prep for grouping
    stripe_left['norm_date_norm'] = stripe_left['norm_date'].dt.normalize()
    rr_df = rr.reset_index()
    rr_df['norm_date_norm'] = rr_df['norm_date'].dt.normalize()

    # Count how many per (amount, date) in each
    stripe_counts = (
        stripe_left
        .groupby(['norm_amount','norm_date_norm'])
        .size()
        .reset_index(name='stripe_count')
    )
    rr_counts = (
        rr_df
        .groupby(['norm_amount','norm_date_norm'])
        .size()
        .reset_index(name='rr_count')
    )

    # Find the pairs where both counts == 1
    unique_keys = pd.merge(stripe_counts, rr_counts,
                        on=['norm_amount','norm_date_norm'])
    unique_keys = unique_keys[
        (unique_keys['stripe_count'] == 1) &
        (unique_keys['rr_count']     == 1)
    ]

    # Pull those exact singletons through
    for _, u in unique_keys.iterrows():
        amt = u['norm_amount']
        d   = u['norm_date_norm']
        srow = stripe_left[
            (stripe_left['norm_amount'] == amt) &
            (stripe_left['norm_date_norm'] == d)
        ].iloc[0]
        rrow = rr_df[
            (rr_df['norm_amount'] == amt) &
            (rr_df['norm_date_norm'] == d)
        ].iloc[0]
        pairs.append((int(srow['idx_stripe']), int(rrow['idx_raisenow'])))
        rr = rr.drop(rrow['idx_raisenow'])











    # --- build the merged DataFrame ---
    merged_rows = []
    for s_idx, r_idx in pairs:
        srow = stripe_charge.loc[s_idx].to_dict()
        rrow = raisenow.loc[r_idx].to_dict()
        # drop any overlapping keys so we never get suffixes
        for k in ['norm_name','norm_date','norm_email','norm_amount','norm_currency','idx_stripe']:
            rrow.pop(k, None)
        # now combine so stripe values win for those keys, and raisenow adds its own columns
        merged = {**srow, **rrow}
        merged_rows.append(merged)

    combined = pd.DataFrame(merged_rows)

    starting_columns = ['norm_name', 'norm_date', 'norm_email', 'norm_amount', 'norm_currency', 'norm_zweck']
    # reorder columns to put the most important ones first
    combined = pd.concat([
        combined[starting_columns],
        combined.drop(columns=starting_columns)
    ], axis=1)
    

    # --- slice out the requested view ---
    if table_name == 'merged':
        result = combined
    elif table_name == 'stripe_only':
        used = {s for s, _ in pairs}
        result = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]
    elif table_name == 'raisenow_only':
        used = {r for _, r in pairs}
        result = raisenow[~raisenow['idx_raisenow'].isin(used)]
    elif table_name == 'export':
        used = {s for s, _ in pairs}
        stripe_only = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]        
        result = pd.concat([combined, stripe_only, stripe_adjustment, stripe_stripeFee], ignore_index=True)
        # add the Stripe fees to the end of the table
        # Set the timestamp to the last day of the month used by the dataset
        latest_date = pd.to_datetime(result['norm_date']).max()
        total_timestamp = pd.Timestamp(year=latest_date.year, month=latest_date.month, day=1) + pd.offsets.MonthEnd(0)
        total_timestamp = total_timestamp.replace(hour=23, minute=59, second=59, microsecond=0)
        new_rows = [
            {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Buchungsgebühren', 'norm_amount': total_stripe_charge_fees * (-1)},
            {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Rückbuchungsgebühren', 'norm_amount': total_stripe_refund_fees * (-1)},
            {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Korrekturgebühren', 'norm_amount': total_stripe_adjustment_fees * (-1)},
            {'norm_name': 'Verrechnung Gebühren', 'norm_currency': 'EUR', 'norm_date': total_timestamp, 'norm_zweck': 'Stripe Gebühren', 'norm_amount': total_stripe_stripeFee_fees * (-1)}
        ]
        new_rows_df = pd.DataFrame(new_rows)
        result = pd.concat([result, new_rows_df], ignore_index=True)
        
        # fix empty name values
        for i, row in result.iterrows():
            if pd.isna(row.get('norm_name')) and pd.notna(row.get('Vorname')) and pd.notna(row.get('Nachname')):
                result.at[i, 'norm_name'] = f"{row.get('Vorname')} {row.get('Nachname')}".strip()

        # fix empty email values
        for i, row in result.iterrows():
            if (pd.isna(row.get('norm_email')) or row.get('norm_email') == '') and pd.notna(row.get('E-Mail-Adresse')):
                result.at[i, 'norm_email'] = f"{row.get('E-Mail-Adresse')}".strip()
        
        # rename columns
        result = result.rename(columns={
            'norm_name': 'name',
            'norm_date': 'booking_date',
            'norm_email': 'email',
            'norm_amount': 'amount',
            'norm_currency': 'currency',
            'norm_zweck': 'reference'
        })
        
        # add sign column based on amount
        result['sign'] = result['amount'].apply(lambda x: 'CRDT' if x >= 0 else 'DBIT')
        
        # sort columns
        result = result[[
            'booking_date', 'name', 'email', 'amount', 'currency', 'sign', 'reference'
        ]]
        
    else:
        raise ValueError(f"Unknown table_name '{table_name}'")

    return result.dropna(axis=1, how='all')


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/upload', methods=['POST'])
def upload():
    files = request.files.getlist('files')
    if not files:
        return jsonify({'error': 'No files uploaded'}), 400

    for f in files:
        print('uploading file:', f.filename)
        raw = (
            pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f)
        )
        raw = raw.dropna(how='all')
        raw = raw.astype(object).replace({np.nan: None})
        cols = list(raw.columns)

        if cols[:len(STRIPE_STARTING_COLS)] == STRIPE_STARTING_COLS:
            key = 'stripe_import'
            dedupe_col = 'ID'
        elif cols[:len(RAISENOW_STARTING_COLS)] == RAISENOW_STARTING_COLS:
            key = 'raisenow_import'
            dedupe_col = 'Identifikationsnummer'
        else:
            print('file does not match expected formats:', f.filename)
            return jsonify({
                "status": "error",
                "message": f"File '{f.filename}' does not match expected formats."
            }), 400
        
        existing = get_dataframe(key)
        combined = pd.concat([existing, raw], ignore_index=True)
        deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True)

        # Save back to session
        session[key] = deduped.astype(object).where(pd.notnull(deduped), None).to_dict(orient='records')

    return jsonify({'status': 'ok'})


@app.route('/get_table')
def get_table():
    table = request.args.get('table')
    print('get_table:', table)
    df = get_merged_df(table)
    print('number of rows:', len(df))
    df = df.astype(object).where(pd.notnull(df), None)
    return jsonify({
        'columns': list(df.columns),
        'data': df.to_dict(orient='records')
    })


@app.route('/download_xlsx')
def download_xlsx():
    sheets = {
        name: get_merged_df(name)
        for name in [
            'stripe_import',
            'raisenow_import',
            'merged',
            'stripe_only',
            'raisenow_only',
            'export'
        ]
    }

    output = BytesIO()
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        workbook = writer.book
        for name, df in sheets.items():
            df.to_excel(writer, sheet_name=name, index=False)
            worksheet = writer.sheets[name]

            # 1) Freeze header row
            worksheet.freeze_panes(1, 0)

            # 2) Autofilter on the header row across all columns
            #    (0,0) is the top-left cell; (len(df), len(df.columns)-1) covers all data rows
            worksheet.autofilter(0, 0, df.shape[0], df.shape[1] - 1)

            # 3) Set column widths to match first-row entries
            first_row = df.iloc[0].astype(str)
            for col_idx, cell_value in enumerate(first_row):
                worksheet.set_column(col_idx, col_idx, len(cell_value) + 2)

    output.seek(0)
    return send_file(
        output,
        as_attachment=True,
        download_name='all_tables.xlsx',
        mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
    )


@app.route('/download_mt940')
def download_mt940():
    df = get_merged_df('export')
    
    mt940_string = generate_mt940(
    df,
    account_iban    = "11223344/55667788",
    transaction_ref = "REFEXCELEXPORT",
    statement_number= "00000",
    opening_balance = 0.0,
    txn_code        = "NMSC",
    txn_ref         = "NONREF",
    info_prefix     = "169?00RAISENOW??20"
)
    
    output = BytesIO()
    output.write(mt940_string.encode('utf-8'))    
    output.seek(0)
    
    return send_file(
        output,
        as_attachment=True,
        download_name='export_mt940.txt',
        mimetype='text/plain'
    )

@app.route('/clear_session', methods=['POST'])
def clear_session():
    """
    Clear all session data and reset server-side stored DataFrames.
    """
    session.clear()
    return jsonify({'status': 'session cleared'})

def wrap_string(text: str, max_length: int = 65) -> str:
    """
    Wraps `text` at exactly `max_length` characters, inserting "\r\n" every max_length chars.
    Existing line breaks are preserved (each line is wrapped separately).
    """
    wrapped_lines = []
    # split on any existing newline (handles "\n", "\r\n", etc.)
    for line in text.splitlines():
        # chop each line into max_length pieces
        for i in range(0, len(line), max_length):
            wrapped_lines.append(line[i : i + max_length])
    # re-join with Windows-style breaks
    return "\r\n".join(wrapped_lines)

def generate_mt940(df: pd.DataFrame,
                   account_iban: str,
                   transaction_ref: str,
                   statement_number: str = None,
                   opening_balance: float = 0.0,
                   txn_code: str = 'NMSC',
                   txn_ref:  str = 'NONREF',
                   info_prefix: str = None) -> str:
    """
    Generate an MT940 text statement in the “custom” style shown above.

    Parameters
    ----------
    df : pd.DataFrame
      Columns required:
        - 'booking_date' (datetime or str YYYY-MM-DD)
        - 'amount'       (float)
        - 'currency'     (str, e.g. 'EUR')
        - 'sign'         (str, 'CRDT' or 'DBIT')
        - 'reference'    (str, used inside your info_prefix section)
        - 'name'         (str, appended after “?32” in tag 86)
    account_iban : str
      Goes into tag 25 exactly as you want it (e.g. "11223344/55667788").
    transaction_ref : str
      Tag 20 (e.g. "REFEXCELEXPORT")
    statement_number : str, optional
      If given, used verbatim for tag 28C (e.g. "00000"); otherwise falls back to
      "{transaction_ref}/1"
    opening_balance : float, optional
      Starting balance for tag 60F
    txn_code : str, optional
      The 3-letter code in your :61: line (default "NMSC")
    txn_ref : str, optional
      The literal reference after that code (default "NONREF")
    info_prefix : str, optional
      If you set e.g. "169?00RAISENOW??20", your tag 86 lines become
        169?00RAISENOW??20<reference> ?32<name>
      If you leave it `None`, we fall back to a simple `<name> <reference>` join.

    Returns
    -------
    A single string with CRLF line endings.
    """
    # normalize & sort
    df2 = df.copy()
    df2['booking_date'] = pd.to_datetime(df2['booking_date'])
    df2.sort_values('booking_date', inplace=True)

    # constant currency
    currency = df2['currency'].iat[0]

    lines = []
    # header
    lines.append(f":20:{transaction_ref}")
    lines.append(f":25:{account_iban}")
    if statement_number is None:
        lines.append(f":28C:{transaction_ref}/1")
    else:
        lines.append(f":28C:{statement_number}")

    # opening balance
    first_dt = df2['booking_date'].iat[0]
    ob_sign = 'C' if opening_balance >= 0 else 'D'
    ob_amt  = abs(opening_balance)
    ob_str  = f"{ob_amt:.2f}".replace('.', ',')
    lines.append(f":60F:{ob_sign}{first_dt.strftime('%y%m%d')}{currency}{ob_str}")

    # transactions
    for _, row in df2.iterrows():
        bd = row['booking_date']
        sign = 'C' if row['sign']=='CRDT' else 'D'
        amt = abs(row['amount'])
        amt_str = f"{amt:.2f}".replace('.', ',')

        # :61:YYMMDDMMDD[C|D]amount<txn_code><txn_ref>
        lines.append(
            f":61:{bd.strftime('%y%m%d')}"
            f"{bd.strftime('%m%d')}"
            f"{sign}{amt_str}"
            f"{txn_code}{txn_ref}"
        )

        # :86: either structured or simple fallback
        raw_rem  = row.get('reference', '')
        raw_name = row.get('name', '')
        rem  = '' if pd.isna(raw_rem) else str(raw_rem)
        name = '' if pd.isna(raw_name) else str(raw_name)

        if info_prefix:
            # your “169?00RAISENOW??20<reference> ?32<name>”
            lines.append(f":86:{info_prefix}{rem} ?32{name}")
        else:
            # old-style "<name> <reference>"
            info = " ".join(filter(None, [name, rem]))
            lines.append(f":86:{info}")

    # closing balance
    net_mv = sum(
        row['amount'] if row['sign']=='CRDT' else -row['amount']
        for _, row in df2.iterrows()
    )
    closing = opening_balance + net_mv
    cb_sign = 'C' if closing >= 0 else 'D'
    cb_amt  = abs(closing)
    cb_str  = f"{cb_amt:.2f}".replace('.', ',')
    last_dt = df2['booking_date'].iat[-1]
    lines.append(f":62F:{cb_sign}{last_dt.strftime('%y%m%d')}{currency}{cb_str}")
    
    file_str = "\r\n".join(lines)

    return wrap_string(file_str)


if __name__ == '__main__':
    app.run(debug=True)