497 lines
18 KiB
Python
497 lines
18 KiB
Python
from flask import Flask, request, jsonify, render_template, send_file, session
|
||
import pandas as pd
|
||
import numpy as np
|
||
from io import BytesIO
|
||
from flask_session import Session
|
||
from datetime import datetime
|
||
|
||
app = Flask(__name__)
|
||
app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key
|
||
app.config['SESSION_TYPE'] = 'filesystem'
|
||
app.config['SESSION_FILE_DIR'] = './.flask_session/'
|
||
Session(app)
|
||
|
||
STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details']
|
||
RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung']
|
||
|
||
|
||
def get_dataframe(key):
|
||
"""
|
||
Load a DataFrame from session.
|
||
"""
|
||
records = session.get(key, [])
|
||
if records:
|
||
df = pd.DataFrame(records)
|
||
else:
|
||
df = pd.DataFrame()
|
||
return df
|
||
|
||
|
||
def get_merged_df(table_name):
|
||
"""
|
||
Return a DataFrame for the given table_name based on Stripe and Raisenow inputs,
|
||
enforcing strict one-to-one matching with:
|
||
- exact same-day matches first
|
||
- then ±1-day fuzzy matches
|
||
- no pandas merge suffixes at all
|
||
- all original columns (including Raisenow's norm_zweck) preserved
|
||
"""
|
||
|
||
# --- load & normalize Stripe ---
|
||
stripe_import = get_dataframe('stripe_import')
|
||
|
||
if stripe_import.empty:
|
||
return pd.DataFrame()
|
||
|
||
stripe_charge = (
|
||
stripe_import
|
||
.query("Type == 'Charge'")
|
||
.copy()
|
||
)
|
||
|
||
stripe_adjustment = (
|
||
stripe_import
|
||
.query("Type == 'Adjustment'")
|
||
.copy()
|
||
)
|
||
|
||
stripe_refund = (
|
||
stripe_import
|
||
.query("Type == 'Payment Failure Refund'")
|
||
.copy()
|
||
)
|
||
|
||
stripe_stripeFee = (
|
||
stripe_import
|
||
.query("Type == 'Stripe Fee'")
|
||
.copy()
|
||
)
|
||
|
||
# sum up the fees
|
||
total_stripe_charge_fees = stripe_charge['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
|
||
total_stripe_adjustment_fees = stripe_adjustment['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
|
||
total_stripe_refund_fees = stripe_refund['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
|
||
total_stripe_stripeFee_fees = stripe_stripeFee['Fees'].astype(str).str.replace(',', '.').astype(float).sum()
|
||
|
||
stripe_adjustment['norm_date'] = pd.to_datetime(stripe_adjustment['Created'], format='%Y-%m-%d %H:%M')
|
||
stripe_adjustment['norm_amount'] = stripe_adjustment['Amount'].astype(str).str.replace(',', '.').astype(float)
|
||
stripe_adjustment['norm_zweck'] = "Korrekturen"
|
||
|
||
stripe_stripeFee['norm_date'] = pd.to_datetime(stripe_stripeFee['Created'], format='%Y-%m-%d %H:%M')
|
||
stripe_stripeFee['norm_amount'] = stripe_stripeFee['Amount'].astype(str).str.replace(',', '.').astype(float)
|
||
stripe_stripeFee['norm_zweck'] = "Stripe"
|
||
|
||
# Extract the “py_…” token from stripe_refund description
|
||
stripe_refund['norm_payment_id'] = stripe_refund['Description'].str.extract(r'(py_[A-Za-z0-9]+)')
|
||
# Build a list of all extracted py_ IDs
|
||
pyids = stripe_refund['norm_payment_id'].dropna().unique().tolist()
|
||
# Remove from stripe_charge any row whose ID is in that list
|
||
stripe_charge = stripe_charge[~stripe_charge['ID'].isin(pyids)]
|
||
|
||
stripe_charge['idx_stripe'] = stripe_charge.index
|
||
stripe_charge['norm_date'] = pd.to_datetime(stripe_charge['Created'], format='%Y-%m-%d %H:%M')
|
||
stripe_charge['norm_amount'] = stripe_charge['Amount'].astype(str).str.replace(',', '.').astype(float)
|
||
stripe_charge['norm_email'] = stripe_charge['Customer Email'].fillna('').astype(str)
|
||
stripe_charge['norm_name'] = stripe_charge.apply(
|
||
lambda r: r['Customer Name'] or r['Details'], axis=1
|
||
)
|
||
|
||
# --- load & normalize Raisenow ---
|
||
raisenow_import = get_dataframe('raisenow_import')
|
||
raisenow = (
|
||
raisenow_import
|
||
.query("Zahlungsmethode != 'paypal'")
|
||
.query("Status == 'succeeded'")
|
||
.copy()
|
||
)
|
||
|
||
raisenow['idx_raisenow'] = raisenow.index
|
||
raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M')
|
||
raisenow['norm_amount'] = raisenow['Betrag'].astype(float)
|
||
raisenow['norm_email'] = raisenow['E-Mail-Adresse'].astype(str)
|
||
raisenow['norm_name'] = raisenow['Vorname'].astype(str) + ' ' + raisenow['Nachname'].astype(str)
|
||
|
||
# start with two‐step assignment
|
||
raisenow['norm_zweck'] = raisenow.apply(
|
||
lambda r: r.get('custom_parameters.altruja_action_name')
|
||
or r.get('custom_parameters.altruja_custom1_code'),
|
||
axis=1
|
||
)
|
||
# additional assignment: build a mask of rows where norm_zweck is still empty/NaN
|
||
mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '')
|
||
raisenow.loc[mask, 'norm_zweck'] = (
|
||
raisenow.loc[mask, 'raisenow_parameters.product.source_url']
|
||
.str.extract(r'https?://[^/]+/([^/?#]+)')[0]
|
||
)
|
||
|
||
# --- return raw tables if requested ---
|
||
if table_name == 'stripe_import':
|
||
return stripe_import.dropna(axis=1, how='all')
|
||
if table_name == 'raisenow_import':
|
||
return raisenow_import.dropna(axis=1, how='all')
|
||
|
||
# --- 1) Greedy exact same-day matches ---
|
||
pairs = []
|
||
# index Raisenow rows for fast lookup + dropping
|
||
rr = raisenow.set_index('idx_raisenow')
|
||
for _, s in stripe_charge.iterrows():
|
||
# filter candidates by amount & name
|
||
cand = rr[
|
||
(rr['norm_amount'] == s['norm_amount']) &
|
||
(
|
||
(rr['norm_name'] == s['norm_name']) |
|
||
(rr['norm_email'] == s['norm_email'])
|
||
)
|
||
].copy()
|
||
if cand.empty:
|
||
continue
|
||
# compute absolute date difference (days only)
|
||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
||
exact_cand = cand[date_diff == pd.Timedelta(0)]
|
||
if not exact_cand.empty:
|
||
# pick the first exact match
|
||
best = exact_cand.index[0]
|
||
pairs.append((int(s['idx_stripe']), int(best)))
|
||
rr = rr.drop(best)
|
||
|
||
# --- 2) Greedy fuzzy ±1-day matches on remaining rows ---
|
||
used_stripe = {s for s, _ in pairs}
|
||
stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()
|
||
for _, s in stripe_left.iterrows():
|
||
cand = rr[
|
||
(rr['norm_amount'] == s['norm_amount']) &
|
||
(
|
||
(rr['norm_name'] == s['norm_name']) |
|
||
(rr['norm_email'] == s['norm_email'])
|
||
)
|
||
].copy()
|
||
if cand.empty:
|
||
continue
|
||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
||
cand = cand[date_diff <= pd.Timedelta(days=1)]
|
||
if cand.empty:
|
||
continue
|
||
# pick the one with the smallest gap
|
||
best = date_diff.idxmin()
|
||
pairs.append((int(s['idx_stripe']), int(best)))
|
||
rr = rr.drop(best)
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# --- 3) Unique amount & exact-date-only matches ---
|
||
# Recompute “leftovers” now after steps 1+2
|
||
used_stripe = {s for s, _ in pairs}
|
||
stripe_left = stripe_charge[~stripe_charge['idx_stripe'].isin(used_stripe)].copy()
|
||
|
||
# Prep for grouping
|
||
stripe_left['norm_date_norm'] = stripe_left['norm_date'].dt.normalize()
|
||
rr_df = rr.reset_index()
|
||
rr_df['norm_date_norm'] = rr_df['norm_date'].dt.normalize()
|
||
|
||
# Count how many per (amount, date) in each
|
||
stripe_counts = (
|
||
stripe_left
|
||
.groupby(['norm_amount','norm_date_norm'])
|
||
.size()
|
||
.reset_index(name='stripe_count')
|
||
)
|
||
rr_counts = (
|
||
rr_df
|
||
.groupby(['norm_amount','norm_date_norm'])
|
||
.size()
|
||
.reset_index(name='rr_count')
|
||
)
|
||
|
||
# Find the pairs where both counts == 1
|
||
unique_keys = pd.merge(stripe_counts, rr_counts,
|
||
on=['norm_amount','norm_date_norm'])
|
||
unique_keys = unique_keys[
|
||
(unique_keys['stripe_count'] == 1) &
|
||
(unique_keys['rr_count'] == 1)
|
||
]
|
||
|
||
# Pull those exact singletons through
|
||
for _, u in unique_keys.iterrows():
|
||
amt = u['norm_amount']
|
||
d = u['norm_date_norm']
|
||
srow = stripe_left[
|
||
(stripe_left['norm_amount'] == amt) &
|
||
(stripe_left['norm_date_norm'] == d)
|
||
].iloc[0]
|
||
rrow = rr_df[
|
||
(rr_df['norm_amount'] == amt) &
|
||
(rr_df['norm_date_norm'] == d)
|
||
].iloc[0]
|
||
pairs.append((int(srow['idx_stripe']), int(rrow['idx_raisenow'])))
|
||
rr = rr.drop(rrow['idx_raisenow'])
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# --- build the merged DataFrame ---
|
||
merged_rows = []
|
||
for s_idx, r_idx in pairs:
|
||
srow = stripe_charge.loc[s_idx].to_dict()
|
||
rrow = raisenow.loc[r_idx].to_dict()
|
||
# drop any overlapping keys so we never get suffixes
|
||
for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']:
|
||
rrow.pop(k, None)
|
||
# now combine so stripe values win for those keys, and raisenow adds its own columns
|
||
merged = {**srow, **rrow}
|
||
merged_rows.append(merged)
|
||
|
||
combined = pd.DataFrame(merged_rows)
|
||
|
||
starting_columns = ['norm_name', 'norm_date', 'norm_email', 'norm_amount', 'norm_zweck']
|
||
# reorder columns to put the most important ones first
|
||
combined = pd.concat([
|
||
combined[starting_columns],
|
||
combined.drop(columns=starting_columns)
|
||
], axis=1)
|
||
|
||
|
||
# --- slice out the requested view ---
|
||
if table_name == 'merged':
|
||
result = combined
|
||
elif table_name == 'stripe_only':
|
||
used = {s for s, _ in pairs}
|
||
result = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]
|
||
elif table_name == 'raisenow_only':
|
||
used = {r for _, r in pairs}
|
||
result = raisenow[~raisenow['idx_raisenow'].isin(used)]
|
||
elif table_name == 'export':
|
||
used = {s for s, _ in pairs}
|
||
stripe_only = stripe_charge[~stripe_charge['idx_stripe'].isin(used)]
|
||
result = pd.concat([combined, stripe_only, stripe_adjustment, stripe_stripeFee], ignore_index=True)
|
||
# add the Stripe fees to the end of the table
|
||
new_rows = [
|
||
{'norm_zweck': 'Buchungsgebühren', 'norm_amount': total_stripe_charge_fees * (-1)},
|
||
{'norm_zweck': 'Rückbuchungsgebühren', 'norm_amount': total_stripe_refund_fees * (-1)},
|
||
{'norm_zweck': 'Korrekturgebühren', 'norm_amount': total_stripe_adjustment_fees * (-1)},
|
||
{'norm_zweck': 'Stripe Gebühren', 'norm_amount': total_stripe_stripeFee_fees * (-1)}
|
||
]
|
||
new_rows_df = pd.DataFrame(new_rows)
|
||
result = pd.concat([result, new_rows_df], ignore_index=True)
|
||
|
||
# fix empty name values
|
||
for i, row in result.iterrows():
|
||
if pd.isna(row.get('norm_name')) and pd.notna(row.get('Vorname')) and pd.notna(row.get('Nachname')):
|
||
result.at[i, 'norm_name'] = f"{row.get('Vorname')} {row.get('Nachname')}".strip()
|
||
|
||
# fix empty email values
|
||
for i, row in result.iterrows():
|
||
if (pd.isna(row.get('norm_email')) or row.get('norm_email') == '') and pd.notna(row.get('E-Mail-Adresse')):
|
||
result.at[i, 'norm_email'] = f"{row.get('E-Mail-Adresse')}".strip()
|
||
|
||
else:
|
||
raise ValueError(f"Unknown table_name '{table_name}'")
|
||
|
||
return result.dropna(axis=1, how='all')
|
||
|
||
|
||
@app.route('/')
|
||
def index():
|
||
return render_template('index.html')
|
||
|
||
|
||
@app.route('/upload', methods=['POST'])
|
||
def upload():
|
||
files = request.files.getlist('files')
|
||
if not files:
|
||
return jsonify({'error': 'No files uploaded'}), 400
|
||
|
||
for f in files:
|
||
raw = (
|
||
pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f)
|
||
)
|
||
raw = raw.dropna(how='all').dropna(axis=1, how='all')
|
||
raw = raw.astype(object).replace({np.nan: None})
|
||
cols = list(raw.columns)
|
||
if cols[:len(STRIPE_STARTING_COLS)] == STRIPE_STARTING_COLS:
|
||
key = 'stripe_import'
|
||
dedupe_col = 'ID'
|
||
elif cols[:len(RAISENOW_STARTING_COLS)] == RAISENOW_STARTING_COLS:
|
||
key = 'raisenow_import'
|
||
dedupe_col = 'Identifikationsnummer'
|
||
else:
|
||
continue
|
||
|
||
existing = get_dataframe(key)
|
||
combined = pd.concat([existing, raw], ignore_index=True)
|
||
deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True)
|
||
|
||
# Save back to session
|
||
session[key] = deduped.astype(object).where(pd.notnull(deduped), None).to_dict(orient='records')
|
||
|
||
return jsonify({'status': 'ok'})
|
||
|
||
|
||
@app.route('/get_table')
|
||
def get_table():
|
||
table = request.args.get('table')
|
||
|
||
df = get_merged_df(table)
|
||
|
||
df = df.astype(object).where(pd.notnull(df), None)
|
||
return jsonify({
|
||
'columns': list(df.columns),
|
||
'data': df.to_dict(orient='records')
|
||
})
|
||
|
||
|
||
@app.route('/download')
|
||
def download():
|
||
sheets = {
|
||
name: get_merged_df(name)
|
||
for name in [
|
||
'stripe_import',
|
||
'raisenow_import',
|
||
'merged',
|
||
'stripe_only',
|
||
'raisenow_only',
|
||
'export'
|
||
]
|
||
}
|
||
|
||
output = BytesIO()
|
||
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
||
workbook = writer.book
|
||
for name, df in sheets.items():
|
||
df.to_excel(writer, sheet_name=name, index=False)
|
||
worksheet = writer.sheets[name]
|
||
|
||
# 1) Freeze header row
|
||
worksheet.freeze_panes(1, 0)
|
||
|
||
# 2) Autofilter on the header row across all columns
|
||
# (0,0) is the top-left cell; (len(df), len(df.columns)-1) covers all data rows
|
||
worksheet.autofilter(0, 0, df.shape[0], df.shape[1] - 1)
|
||
|
||
# 3) Set column widths to match first-row entries
|
||
first_row = df.iloc[0].astype(str)
|
||
for col_idx, cell_value in enumerate(first_row):
|
||
worksheet.set_column(col_idx, col_idx, len(cell_value) + 2)
|
||
|
||
output.seek(0)
|
||
return send_file(
|
||
output,
|
||
as_attachment=True,
|
||
download_name='all_tables.xlsx',
|
||
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||
)
|
||
|
||
@app.route('/clear_session', methods=['POST'])
|
||
def clear_session():
|
||
"""
|
||
Clear all session data and reset server-side stored DataFrames.
|
||
"""
|
||
session.clear()
|
||
return jsonify({'status': 'session cleared'})
|
||
|
||
def export_to_special_format(
|
||
df: pd.DataFrame,
|
||
reference: str,
|
||
account: str,
|
||
statement_number: int,
|
||
opening_date: datetime,
|
||
opening_balance: float,
|
||
currency: str,
|
||
closing_date: datetime = None,
|
||
closing_balance: float = None
|
||
) -> str:
|
||
"""
|
||
Convert a DataFrame of transactions into the special SWIFT-like file format.
|
||
|
||
Parameters
|
||
----------
|
||
df : pd.DataFrame
|
||
Must contain columns:
|
||
- 'value_date' (datetime)
|
||
- 'booking_date' (datetime)
|
||
- 'dc' (str): 'C' for credit, 'D' for debit
|
||
- 'amount' (float)
|
||
- optional 'transaction_code' (str)
|
||
- optional 'bank_reference' (str)
|
||
- 'narrative' (str)
|
||
reference : str
|
||
Message reference for :20:
|
||
account : str
|
||
Account number for :25:
|
||
statement_number : int
|
||
Statement sequence for :28C: (will be zero-padded to 5 digits)
|
||
opening_date : datetime
|
||
Opening balance date
|
||
opening_balance : float
|
||
Opening balance amount (positive)
|
||
currency : str
|
||
Three-letter currency code (e.g. 'EUR')
|
||
closing_date : datetime, optional
|
||
Closing balance date
|
||
closing_balance : float, optional
|
||
Closing balance amount (positive)
|
||
|
||
Returns
|
||
-------
|
||
str
|
||
The formatted file content.
|
||
"""
|
||
lines = []
|
||
# Header
|
||
lines.append(f":20:{reference}")
|
||
lines.append(f":25:{account}")
|
||
lines.append(f":28C:{statement_number:05d}")
|
||
|
||
# Opening balance :60F:
|
||
od = opening_date.strftime('%y%m%d')
|
||
ob = f"{opening_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
|
||
lines.append(f":60F:C{od}{currency}{ob}")
|
||
|
||
# Transactions
|
||
for _, row in df.iterrows():
|
||
vd = row['value_date'].strftime('%y%m%d')
|
||
bd = row['booking_date'].strftime('%m%d')
|
||
dc = row['dc']
|
||
amt = f"{row['amount']:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
|
||
tcode = row.get('transaction_code', '')
|
||
bref = row.get('bank_reference', '')
|
||
lines.append(f":61:{vd}{bd}{dc}{amt}{tcode}{bref}")
|
||
lines.append(f":86:{row['narrative']}")
|
||
|
||
# Closing balance :62F:
|
||
if closing_date and closing_balance is not None:
|
||
cd = closing_date.strftime('%y%m%d')
|
||
cb = f"{closing_balance:,.2f}".replace(',', 'X').replace('.', ',').replace('X', '')
|
||
lines.append(f":62F:C{cd}{currency}{cb}")
|
||
|
||
return "\n".join(lines)
|
||
|
||
# Example usage:
|
||
# df = pd.DataFrame([...])
|
||
# content = export_to_special_format(
|
||
# df,
|
||
# reference='REFEXCELEXPORT',
|
||
# account='11223344/55667788',
|
||
# statement_number=0,
|
||
# opening_date=datetime(2025,3,6),
|
||
# opening_balance=0.00,
|
||
# currency='EUR',
|
||
# closing_date=datetime(2025,3,6),
|
||
# closing_balance=12048.71
|
||
# )
|
||
# with open('statement.txt', 'w') as f:
|
||
# f.write(content)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
app.run(debug=True)
|