264 lines
9.3 KiB
Python
264 lines
9.3 KiB
Python
from flask import Flask, request, jsonify, render_template, send_file, session
|
||
import pandas as pd
|
||
import numpy as np
|
||
from io import BytesIO
|
||
from flask_session import Session
|
||
|
||
app = Flask(__name__)
|
||
app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key
|
||
app.config['SESSION_TYPE'] = 'filesystem'
|
||
app.config['SESSION_FILE_DIR'] = './.flask_session/'
|
||
Session(app)
|
||
|
||
STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details']
|
||
RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung']
|
||
|
||
|
||
def get_dataframe(key):
|
||
"""
|
||
Load a DataFrame from session.
|
||
"""
|
||
records = session.get(key, [])
|
||
if records:
|
||
df = pd.DataFrame(records)
|
||
else:
|
||
df = pd.DataFrame()
|
||
return df
|
||
|
||
|
||
def get_merged_df(table_name):
|
||
"""
|
||
Return a DataFrame for the given table_name based on Stripe and Raisenow inputs,
|
||
enforcing strict one-to-one matching with:
|
||
- exact same-day matches first
|
||
- then ±1-day fuzzy matches
|
||
- no pandas merge suffixes at all
|
||
- all original columns (including Raisenow's norm_zweck) preserved
|
||
"""
|
||
|
||
# --- load & normalize Stripe ---
|
||
stripe = get_dataframe('stripe_import')
|
||
if not stripe.empty:
|
||
stripe = (
|
||
stripe
|
||
.query("Type == 'Charge'")
|
||
.copy()
|
||
)
|
||
else:
|
||
return stripe
|
||
|
||
stripe['idx_stripe'] = stripe.index
|
||
stripe['norm_date'] = pd.to_datetime(stripe['Created'], format='%Y-%m-%d %H:%M')
|
||
stripe['norm_amount'] = stripe['Amount'].astype(str).str.replace(',', '.').astype(float)
|
||
stripe['norm_email'] = stripe['Customer Email'].astype(str)
|
||
stripe['norm_name'] = stripe.apply(
|
||
lambda r: r['Customer Name'] or r['Details'], axis=1
|
||
)
|
||
|
||
# --- load & normalize Raisenow ---
|
||
raisenow = get_dataframe('raisenow_import')
|
||
if not raisenow.empty:
|
||
raisenow = (
|
||
raisenow
|
||
.query("Zahlungsmethode != 'paypal'")
|
||
.query("Status == 'succeeded'")
|
||
.copy()
|
||
)
|
||
else:
|
||
return raisenow
|
||
|
||
raisenow['idx_raisenow'] = raisenow.index
|
||
raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M')
|
||
raisenow['norm_amount'] = raisenow['Betrag'].astype(float)
|
||
raisenow['norm_email'] = raisenow['E-Mail-Adresse'].astype(str)
|
||
raisenow['norm_name'] = raisenow['Vorname'].astype(str) + ' ' + raisenow['Nachname'].astype(str)
|
||
|
||
# start with two‐step assignment
|
||
raisenow['norm_zweck'] = raisenow.apply(
|
||
lambda r: r.get('custom_parameters.altruja_action_name')
|
||
or r.get('custom_parameters.altruja_custom1_code'),
|
||
axis=1
|
||
)
|
||
# additional assignment: build a mask of rows where norm_zweck is still empty/NaN
|
||
mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '')
|
||
raisenow.loc[mask, 'norm_zweck'] = (
|
||
raisenow.loc[mask, 'raisenow_parameters.product.source_url']
|
||
.str.extract(r'https?://[^/]+/([^/?#]+)')[0]
|
||
)
|
||
|
||
# --- return raw tables if requested ---
|
||
if table_name == 'stripe_import':
|
||
return stripe.dropna(axis=1, how='all')
|
||
if table_name == 'raisenow_import':
|
||
return raisenow.dropna(axis=1, how='all')
|
||
|
||
# --- 1) Greedy exact same-day matches ---
|
||
pairs = []
|
||
# index Raisenow rows for fast lookup + dropping
|
||
rr = raisenow.set_index('idx_raisenow')
|
||
for _, s in stripe.iterrows():
|
||
# filter candidates by amount & name
|
||
cand = rr[
|
||
(rr['norm_amount'] == s['norm_amount']) &
|
||
(rr['norm_name'] == s['norm_name'])
|
||
].copy()
|
||
if cand.empty:
|
||
continue
|
||
# compute absolute date difference (days only)
|
||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
||
exact_cand = cand[date_diff == pd.Timedelta(0)]
|
||
if not exact_cand.empty:
|
||
# pick the first exact match
|
||
best = exact_cand.index[0]
|
||
pairs.append((int(s['idx_stripe']), int(best)))
|
||
rr = rr.drop(best)
|
||
|
||
# --- 2) Greedy fuzzy ±1-day matches on remaining rows ---
|
||
used_stripe = {s for s, _ in pairs}
|
||
stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy()
|
||
for _, s in stripe_left.iterrows():
|
||
cand = rr[
|
||
(rr['norm_amount'] == s['norm_amount']) &
|
||
(rr['norm_name'] == s['norm_name'])
|
||
].copy()
|
||
if cand.empty:
|
||
continue
|
||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
||
cand = cand[date_diff <= pd.Timedelta(days=1)]
|
||
if cand.empty:
|
||
continue
|
||
# pick the one with the smallest gap
|
||
best = date_diff.idxmin()
|
||
pairs.append((int(s['idx_stripe']), int(best)))
|
||
rr = rr.drop(best)
|
||
|
||
# --- build the merged DataFrame without suffixes ---
|
||
merged_rows = []
|
||
for s_idx, r_idx in pairs:
|
||
srow = stripe.loc[s_idx].to_dict()
|
||
rrow = raisenow.loc[r_idx].to_dict()
|
||
# drop any overlapping keys so we never get suffixes
|
||
for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']:
|
||
rrow.pop(k, None)
|
||
# now combine so stripe values win for those keys, and raisenow adds its own columns
|
||
merged = {**srow, **rrow}
|
||
merged_rows.append(merged)
|
||
|
||
combined = pd.DataFrame(merged_rows)
|
||
|
||
# --- slice out the requested view ---
|
||
if table_name == 'merged':
|
||
result = combined
|
||
elif table_name == 'stripe_only':
|
||
used = {s for s, _ in pairs}
|
||
result = stripe[~stripe['idx_stripe'].isin(used)]
|
||
elif table_name == 'raisenow_only':
|
||
used = {r for _, r in pairs}
|
||
result = raisenow[~raisenow['idx_raisenow'].isin(used)]
|
||
else:
|
||
raise ValueError(f"Unknown table_name '{table_name}'")
|
||
|
||
return result.dropna(axis=1, how='all')
|
||
|
||
|
||
@app.route('/')
|
||
def index():
|
||
return render_template('index.html')
|
||
|
||
|
||
@app.route('/upload', methods=['POST'])
|
||
def upload():
|
||
files = request.files.getlist('files')
|
||
if not files:
|
||
return jsonify({'error': 'No files uploaded'}), 400
|
||
|
||
for f in files:
|
||
raw = (
|
||
pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f)
|
||
)
|
||
raw = raw.dropna(how='all').dropna(axis=1, how='all')
|
||
raw = raw.astype(object).replace({np.nan: None})
|
||
cols = list(raw.columns)
|
||
if cols[:len(STRIPE_STARTING_COLS)] == STRIPE_STARTING_COLS:
|
||
key = 'stripe_import'
|
||
dedupe_col = 'ID'
|
||
elif cols[:len(RAISENOW_STARTING_COLS)] == RAISENOW_STARTING_COLS:
|
||
key = 'raisenow_import'
|
||
dedupe_col = 'Identifikationsnummer'
|
||
else:
|
||
continue
|
||
|
||
existing = get_dataframe(key)
|
||
combined = pd.concat([existing, raw], ignore_index=True)
|
||
deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True)
|
||
|
||
# Save back to session
|
||
session[key] = deduped.astype(object).where(pd.notnull(deduped), None).to_dict(orient='records')
|
||
|
||
return jsonify({'status': 'ok'})
|
||
|
||
|
||
@app.route('/get_table')
|
||
def get_table():
|
||
table = request.args.get('table')
|
||
|
||
df = get_merged_df(table)
|
||
|
||
df = df.astype(object).where(pd.notnull(df), None)
|
||
return jsonify({
|
||
'columns': list(df.columns),
|
||
'data': df.to_dict(orient='records')
|
||
})
|
||
|
||
|
||
@app.route('/download')
|
||
def download():
|
||
sheets = {
|
||
name: get_merged_df(name)
|
||
for name in [
|
||
'stripe_import',
|
||
'raisenow_import',
|
||
'merged',
|
||
'stripe_only',
|
||
'raisenow_only'
|
||
]
|
||
}
|
||
|
||
output = BytesIO()
|
||
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
||
workbook = writer.book
|
||
for name, df in sheets.items():
|
||
df.to_excel(writer, sheet_name=name, index=False)
|
||
worksheet = writer.sheets[name]
|
||
|
||
# 1) Freeze header row
|
||
worksheet.freeze_panes(1, 0)
|
||
|
||
# 2) Autofilter on the header row across all columns
|
||
# (0,0) is the top-left cell; (len(df), len(df.columns)-1) covers all data rows
|
||
worksheet.autofilter(0, 0, df.shape[0], df.shape[1] - 1)
|
||
|
||
# 3) Set column widths to match first-row entries
|
||
first_row = df.iloc[0].astype(str)
|
||
for col_idx, cell_value in enumerate(first_row):
|
||
worksheet.set_column(col_idx, col_idx, len(cell_value) + 2)
|
||
|
||
output.seek(0)
|
||
return send_file(
|
||
output,
|
||
as_attachment=True,
|
||
download_name='all_tables.xlsx',
|
||
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||
)
|
||
|
||
@app.route('/clear_session', methods=['POST'])
|
||
def clear_session():
|
||
"""
|
||
Clear all session data and reset server-side stored DataFrames.
|
||
"""
|
||
session.clear()
|
||
return jsonify({'status': 'session cleared'})
|
||
|
||
if __name__ == '__main__':
|
||
app.run(debug=True)
|