diff --git a/app/app.py b/app/app.py index 3689ca0..75556f5 100644 --- a/app/app.py +++ b/app/app.py @@ -6,15 +6,12 @@ from flask_session import Session app = Flask(__name__) app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key -app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key app.config['SESSION_TYPE'] = 'filesystem' app.config['SESSION_FILE_DIR'] = './.flask_session/' Session(app) -STRIPE_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details'] -RAISENOW_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung'] -STRIPE_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details'] -RAISENOW_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung'] +STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details'] +RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung'] def get_dataframe(key): @@ -59,7 +56,7 @@ def get_merged_df(table_name): ) # --- load & normalize Raisenow --- - raisenow = get_dataframe('raiseNow_import') + raisenow = get_dataframe('raisenow_import') if not raisenow.empty: raisenow = ( raisenow @@ -92,45 +89,13 @@ def get_merged_df(table_name): # --- return raw tables if requested --- if table_name == 'stripe_import': return stripe.dropna(axis=1, how='all') - if table_name == 'raiseNow_import': - return raisenow.dropna(axis=1, how='all') - # additional assignment: build a mask of rows where norm_zweck is still empty/NaN - mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '') - raisenow.loc[mask, 'norm_zweck'] = ( - raisenow.loc[mask, 'raisenow_parameters.product.source_url'] - .str.extract(r'https?://[^/]+/([^/?#]+)')[0] - ) - - # --- return raw tables if requested --- - if table_name == 'stripe_import': - return stripe.dropna(axis=1, how='all') - if table_name == 'raiseNow_import': + if table_name == 'raisenow_import': return raisenow.dropna(axis=1, how='all') # --- 1) Greedy exact same-day matches --- pairs = [] # index Raisenow rows for fast lookup + dropping rr = raisenow.set_index('idx_raisenow') - for _, s in stripe.iterrows(): - # filter candidates by amount & name - cand = rr[ - (rr['norm_amount'] == s['norm_amount']) & - (rr['norm_name'] == s['norm_name']) - ].copy() - if cand.empty: - continue - # compute absolute date difference (days only) - date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() - exact_cand = cand[date_diff == pd.Timedelta(0)] - if not exact_cand.empty: - # pick the first exact match - best = exact_cand.index[0] - pairs.append((int(s['idx_stripe']), int(best))) - rr = rr.drop(best) - # --- 1) Greedy exact same-day matches --- - pairs = [] - # index Raisenow rows for fast lookup + dropping - rr = raisenow.set_index('idx_raisenow') for _, s in stripe.iterrows(): # filter candidates by amount & name cand = rr[ @@ -181,40 +146,6 @@ def get_merged_df(table_name): combined = pd.DataFrame(merged_rows) - # --- slice out the requested view --- - # --- 2) Greedy fuzzy ±1-day matches on remaining rows --- - used_stripe = {s for s, _ in pairs} - stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy() - for _, s in stripe_left.iterrows(): - cand = rr[ - (rr['norm_amount'] == s['norm_amount']) & - (rr['norm_name'] == s['norm_name']) - ].copy() - if cand.empty: - continue - date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs() - cand = cand[date_diff <= pd.Timedelta(days=1)] - if cand.empty: - continue - # pick the one with the smallest gap - best = date_diff.idxmin() - pairs.append((int(s['idx_stripe']), int(best))) - rr = rr.drop(best) - - # --- build the merged DataFrame without suffixes --- - merged_rows = [] - for s_idx, r_idx in pairs: - srow = stripe.loc[s_idx].to_dict() - rrow = raisenow.loc[r_idx].to_dict() - # drop any overlapping keys so we never get suffixes - for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']: - rrow.pop(k, None) - # now combine so stripe values win for those keys, and raisenow adds its own columns - merged = {**srow, **rrow} - merged_rows.append(merged) - - combined = pd.DataFrame(merged_rows) - # --- slice out the requested view --- if table_name == 'merged': result = combined @@ -230,7 +161,6 @@ def get_merged_df(table_name): return result.dropna(axis=1, how='all') - @app.route('/') def index(): return render_template('index.html') @@ -249,11 +179,11 @@ def upload(): raw = raw.dropna(how='all').dropna(axis=1, how='all') raw = raw.astype(object).replace({np.nan: None}) cols = list(raw.columns) - if cols[:len(STRIPE_COLS)] == STRIPE_COLS: + if cols[:len(STRIPE_STARTING_COLS)] == STRIPE_STARTING_COLS: key = 'stripe_import' dedupe_col = 'ID' - elif cols[:len(RAISENOW_COLS)] == RAISENOW_COLS: - key = 'raiseNow_import' + elif cols[:len(RAISENOW_STARTING_COLS)] == RAISENOW_STARTING_COLS: + key = 'raisenow_import' dedupe_col = 'Identifikationsnummer' else: continue @@ -287,17 +217,7 @@ def download(): name: get_merged_df(name) for name in [ 'stripe_import', - 'raiseNow_import', - 'merged', - 'stripe_only', - 'raisenow_only' - ] - } - sheets = { - name: get_merged_df(name) - for name in [ - 'stripe_import', - 'raiseNow_import', + 'raisenow_import', 'merged', 'stripe_only', 'raisenow_only' @@ -331,5 +251,13 @@ def download(): mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ) +@app.route('/clear_session', methods=['POST']) +def clear_session(): + """ + Clear all session data and reset server-side stored DataFrames. + """ + session.clear() + return jsonify({'status': 'session cleared'}) + if __name__ == '__main__': app.run(debug=True) diff --git a/app/templates/index.html b/app/templates/index.html index 91dcb8a..999ba42 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -8,7 +8,7 @@