Merge branch 'development' of gitea.centx.de:lelo/cdh-merger into development
This commit is contained in:
commit
01152c5f82
79
app/app.py
79
app/app.py
@ -6,12 +6,15 @@ from flask_session import Session
|
||||
|
||||
app = Flask(__name__)
|
||||
app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key
|
||||
app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key
|
||||
app.config['SESSION_TYPE'] = 'filesystem'
|
||||
app.config['SESSION_FILE_DIR'] = './.flask_session/'
|
||||
Session(app)
|
||||
|
||||
STRIPE_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details']
|
||||
RAISENOW_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung']
|
||||
STRIPE_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details']
|
||||
RAISENOW_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung']
|
||||
|
||||
|
||||
def get_dataframe(key):
|
||||
@ -86,6 +89,18 @@ def get_merged_df(table_name):
|
||||
.str.extract(r'https?://[^/]+/([^/?#]+)')[0]
|
||||
)
|
||||
|
||||
# --- return raw tables if requested ---
|
||||
if table_name == 'stripe_import':
|
||||
return stripe.dropna(axis=1, how='all')
|
||||
if table_name == 'raiseNow_import':
|
||||
return raisenow.dropna(axis=1, how='all')
|
||||
# additional assignment: build a mask of rows where norm_zweck is still empty/NaN
|
||||
mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '')
|
||||
raisenow.loc[mask, 'norm_zweck'] = (
|
||||
raisenow.loc[mask, 'raisenow_parameters.product.source_url']
|
||||
.str.extract(r'https?://[^/]+/([^/?#]+)')[0]
|
||||
)
|
||||
|
||||
# --- return raw tables if requested ---
|
||||
if table_name == 'stripe_import':
|
||||
return stripe.dropna(axis=1, how='all')
|
||||
@ -96,6 +111,26 @@ def get_merged_df(table_name):
|
||||
pairs = []
|
||||
# index Raisenow rows for fast lookup + dropping
|
||||
rr = raisenow.set_index('idx_raisenow')
|
||||
for _, s in stripe.iterrows():
|
||||
# filter candidates by amount & name
|
||||
cand = rr[
|
||||
(rr['norm_amount'] == s['norm_amount']) &
|
||||
(rr['norm_name'] == s['norm_name'])
|
||||
].copy()
|
||||
if cand.empty:
|
||||
continue
|
||||
# compute absolute date difference (days only)
|
||||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
||||
exact_cand = cand[date_diff == pd.Timedelta(0)]
|
||||
if not exact_cand.empty:
|
||||
# pick the first exact match
|
||||
best = exact_cand.index[0]
|
||||
pairs.append((int(s['idx_stripe']), int(best)))
|
||||
rr = rr.drop(best)
|
||||
# --- 1) Greedy exact same-day matches ---
|
||||
pairs = []
|
||||
# index Raisenow rows for fast lookup + dropping
|
||||
rr = raisenow.set_index('idx_raisenow')
|
||||
for _, s in stripe.iterrows():
|
||||
# filter candidates by amount & name
|
||||
cand = rr[
|
||||
@ -146,6 +181,40 @@ def get_merged_df(table_name):
|
||||
|
||||
combined = pd.DataFrame(merged_rows)
|
||||
|
||||
# --- slice out the requested view ---
|
||||
# --- 2) Greedy fuzzy ±1-day matches on remaining rows ---
|
||||
used_stripe = {s for s, _ in pairs}
|
||||
stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy()
|
||||
for _, s in stripe_left.iterrows():
|
||||
cand = rr[
|
||||
(rr['norm_amount'] == s['norm_amount']) &
|
||||
(rr['norm_name'] == s['norm_name'])
|
||||
].copy()
|
||||
if cand.empty:
|
||||
continue
|
||||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
||||
cand = cand[date_diff <= pd.Timedelta(days=1)]
|
||||
if cand.empty:
|
||||
continue
|
||||
# pick the one with the smallest gap
|
||||
best = date_diff.idxmin()
|
||||
pairs.append((int(s['idx_stripe']), int(best)))
|
||||
rr = rr.drop(best)
|
||||
|
||||
# --- build the merged DataFrame without suffixes ---
|
||||
merged_rows = []
|
||||
for s_idx, r_idx in pairs:
|
||||
srow = stripe.loc[s_idx].to_dict()
|
||||
rrow = raisenow.loc[r_idx].to_dict()
|
||||
# drop any overlapping keys so we never get suffixes
|
||||
for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']:
|
||||
rrow.pop(k, None)
|
||||
# now combine so stripe values win for those keys, and raisenow adds its own columns
|
||||
merged = {**srow, **rrow}
|
||||
merged_rows.append(merged)
|
||||
|
||||
combined = pd.DataFrame(merged_rows)
|
||||
|
||||
# --- slice out the requested view ---
|
||||
if table_name == 'merged':
|
||||
result = combined
|
||||
@ -224,6 +293,16 @@ def download():
|
||||
'raisenow_only'
|
||||
]
|
||||
}
|
||||
sheets = {
|
||||
name: get_merged_df(name)
|
||||
for name in [
|
||||
'stripe_import',
|
||||
'raiseNow_import',
|
||||
'merged',
|
||||
'stripe_only',
|
||||
'raisenow_only'
|
||||
]
|
||||
}
|
||||
|
||||
output = BytesIO()
|
||||
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user