Compare commits
No commits in common. "ee3f5c062c053ed3e1bce0d519177582371cd559" and "67788a668026d2e47dcd3d291c82f4a39c013d02" have entirely different histories.
ee3f5c062c
...
67788a6680
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,2 @@
|
|||||||
app/__pycache__/
|
app/__pycache__/
|
||||||
app/.flask_session/
|
|
||||||
.env
|
.env
|
||||||
|
|||||||
BIN
app/.flask_session/2029240f6d1128be89ddc32729463129
Normal file
BIN
app/.flask_session/2029240f6d1128be89ddc32729463129
Normal file
Binary file not shown.
220
app/app.py
220
app/app.py
@ -5,13 +5,24 @@ from io import BytesIO
|
|||||||
from flask_session import Session
|
from flask_session import Session
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.secret_key = "gfbierpf934hftrntr45otgß45890tfh34gft45rw" # replace with a secure random key
|
app.secret_key = "your-secret-key" # replace with a secure random key
|
||||||
|
# Configure server-side session (filesystem) to avoid size limits in cookies
|
||||||
app.config['SESSION_TYPE'] = 'filesystem'
|
app.config['SESSION_TYPE'] = 'filesystem'
|
||||||
app.config['SESSION_FILE_DIR'] = './.flask_session/'
|
app.config['SESSION_FILE_DIR'] = './.flask_session/'
|
||||||
Session(app)
|
Session(app)
|
||||||
|
|
||||||
STRIPE_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details']
|
STRIPE_COLS = [
|
||||||
RAISENOW_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung']
|
'Type','ID','Created','Description','Amount','Currency',
|
||||||
|
'Converted Amount','Fees','Net','Converted Currency',
|
||||||
|
'Customer Name','Customer Email','Details'
|
||||||
|
]
|
||||||
|
RAISENOW_COLS = [
|
||||||
|
'Identifikationsnummer','Erstellt','UTC-Offset','Status',
|
||||||
|
'Betrag','Währung','Übernommene Gebühren - Betrag',
|
||||||
|
'Übernommene Gebühren - Währung','Zahlungsmethode',
|
||||||
|
'Zahlungsanbieter','Vorname','Nachname','E-Mail-Adresse',
|
||||||
|
'custom_parameters.altruja_action_name','custom_parameters.altruja_custom1_code'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_dataframe(key, cols):
|
def get_dataframe(key, cols):
|
||||||
@ -26,126 +37,92 @@ def get_dataframe(key, cols):
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
def get_merged_df(table_name):
|
def get_merged_df(table_name):
|
||||||
"""
|
"""
|
||||||
Return a DataFrame for the given table_name based on Stripe and Raisenow inputs,
|
Return a DataFrame for the given table_name based on stripe and raisenow inputs,
|
||||||
enforcing strict one-to-one matching with:
|
including a secondary merge for date tolerance of ±1 day.
|
||||||
- exact same-day matches first
|
|
||||||
- then ±1-day fuzzy matches
|
|
||||||
- no pandas merge suffixes at all
|
|
||||||
- all original columns (including Raisenow's norm_zweck) preserved
|
|
||||||
"""
|
"""
|
||||||
|
stripe_df = get_dataframe('stripe_import', STRIPE_COLS)
|
||||||
|
raisenow_df = get_dataframe('raiseNow_import', RAISENOW_COLS)
|
||||||
|
|
||||||
# --- load & normalize Stripe ---
|
# Normalize stripe
|
||||||
stripe = (
|
stripe_df = stripe_df.query("Type == 'Charge'")
|
||||||
get_dataframe('stripe_import', STRIPE_COLS)
|
stripe_df['norm_date'] = pd.to_datetime(stripe_df['Created'], format='%Y-%m-%d %H:%M')
|
||||||
.query("Type == 'Charge'")
|
stripe_df['norm_amount'] = stripe_df['Amount'].astype(str).str.replace(',', '.')
|
||||||
.copy()
|
stripe_df['norm_amount'] = stripe_df['norm_amount'].astype(float)
|
||||||
)
|
stripe_df['norm_email'] = stripe_df['Customer Email'].astype(str)
|
||||||
stripe['idx_stripe'] = stripe.index
|
stripe_df['norm_name'] = stripe_df.apply(
|
||||||
stripe['norm_date'] = pd.to_datetime(stripe['Created'], format='%Y-%m-%d %H:%M')
|
lambda x: x['Customer Name'] if x.get('Customer Name') else x['Details'],
|
||||||
stripe['norm_amount'] = stripe['Amount'].astype(str).str.replace(',', '.').astype(float)
|
|
||||||
stripe['norm_email'] = stripe['Customer Email'].astype(str)
|
|
||||||
stripe['norm_name'] = stripe.apply(
|
|
||||||
lambda r: r['Customer Name'] or r['Details'], axis=1
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- load & normalize Raisenow ---
|
|
||||||
raisenow = (
|
|
||||||
get_dataframe('raiseNow_import', RAISENOW_COLS)
|
|
||||||
.query("Zahlungsmethode != 'paypal'")
|
|
||||||
.query("Status == 'succeeded'")
|
|
||||||
.copy()
|
|
||||||
)
|
|
||||||
|
|
||||||
raisenow['idx_raisenow'] = raisenow.index
|
|
||||||
raisenow['norm_date'] = pd.to_datetime(raisenow['Erstellt'], format='%Y-%m-%d %H:%M')
|
|
||||||
raisenow['norm_amount'] = raisenow['Betrag'].astype(float)
|
|
||||||
raisenow['norm_email'] = raisenow['E-Mail-Adresse'].astype(str)
|
|
||||||
raisenow['norm_name'] = raisenow['Vorname'].astype(str) + ' ' + raisenow['Nachname'].astype(str)
|
|
||||||
|
|
||||||
# start with two‐step assignment
|
|
||||||
raisenow['norm_zweck'] = raisenow.apply(
|
|
||||||
lambda r: r.get('custom_parameters.altruja_action_name')
|
|
||||||
or r.get('custom_parameters.altruja_custom1_code'),
|
|
||||||
axis=1
|
axis=1
|
||||||
)
|
)
|
||||||
# additional assignment: build a mask of rows where norm_zweck is still empty/NaN
|
|
||||||
mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '')
|
# Normalize raisenow
|
||||||
raisenow.loc[mask, 'norm_zweck'] = (
|
raisenow_df = raisenow_df.query("Zahlungsmethode != 'paypal'")
|
||||||
raisenow.loc[mask, 'raisenow_parameters.product.source_url']
|
raisenow_df = raisenow_df.query("Status == 'succeeded'")
|
||||||
.str.extract(r'https?://[^/]+/([^/?#]+)')[0]
|
raisenow_df['norm_date'] = pd.to_datetime(raisenow_df['Erstellt'], format='%Y-%m-%d %H:%M')
|
||||||
|
raisenow_df['norm_amount'] = raisenow_df['Betrag'].astype(float)
|
||||||
|
raisenow_df['norm_name'] = (
|
||||||
|
raisenow_df['Vorname'].astype(str) + ' ' + raisenow_df['Nachname'].astype(str)
|
||||||
|
)
|
||||||
|
raisenow_df['norm_email'] = raisenow_df['E-Mail-Adresse'].astype(str)
|
||||||
|
raisenow_df['norm_zweck'] = raisenow_df.apply(
|
||||||
|
lambda x: x['custom_parameters.altruja_action_name']
|
||||||
|
if x.get('custom_parameters.altruja_action_name')
|
||||||
|
else x.get('custom_parameters.altruja_custom1_code'),
|
||||||
|
axis=1
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- return raw tables if requested ---
|
if table_name in ('stripe_import', 'raiseNow_import'):
|
||||||
if table_name == 'stripe_import':
|
df = stripe_df if table_name == 'stripe_import' else raisenow_df
|
||||||
return stripe.dropna(axis=1, how='all')
|
return df.dropna(axis=1, how='all')
|
||||||
if table_name == 'raiseNow_import':
|
|
||||||
return raisenow.dropna(axis=1, how='all')
|
|
||||||
|
|
||||||
# --- 1) Greedy exact same-day matches ---
|
# Exact merge
|
||||||
pairs = []
|
exact = pd.merge(
|
||||||
# index Raisenow rows for fast lookup + dropping
|
stripe_df,
|
||||||
rr = raisenow.set_index('idx_raisenow')
|
raisenow_df,
|
||||||
for _, s in stripe.iterrows():
|
on=['norm_amount', 'norm_name'],
|
||||||
# filter candidates by amount & name
|
how='outer',
|
||||||
cand = rr[
|
suffixes=('_stripe', '_raisenow'),
|
||||||
(rr['norm_amount'] == s['norm_amount']) &
|
indicator=True
|
||||||
(rr['norm_name'] == s['norm_name'])
|
)
|
||||||
].copy()
|
exact['date_diff'] = (
|
||||||
if cand.empty:
|
exact['norm_date_stripe'].dt.date - exact['norm_date_raisenow'].dt.date
|
||||||
continue
|
).abs()
|
||||||
# compute absolute date difference (days only)
|
|
||||||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
|
||||||
exact_cand = cand[date_diff == pd.Timedelta(0)]
|
|
||||||
if not exact_cand.empty:
|
|
||||||
# pick the first exact match
|
|
||||||
best = exact_cand.index[0]
|
|
||||||
pairs.append((int(s['idx_stripe']), int(best)))
|
|
||||||
rr = rr.drop(best)
|
|
||||||
|
|
||||||
# --- 2) Greedy fuzzy ±1-day matches on remaining rows ---
|
# Separate matches
|
||||||
used_stripe = {s for s, _ in pairs}
|
exact_matches = exact[(exact['_merge'] == 'both') & (exact['date_diff'] == pd.Timedelta(0))].copy()
|
||||||
stripe_left = stripe[~stripe['idx_stripe'].isin(used_stripe)].copy()
|
stripe_only = exact[exact['_merge'] == 'left_only'].copy()
|
||||||
for _, s in stripe_left.iterrows():
|
raisenow_only = exact[exact['_merge'] == 'right_only'].copy()
|
||||||
cand = rr[
|
|
||||||
(rr['norm_amount'] == s['norm_amount']) &
|
|
||||||
(rr['norm_name'] == s['norm_name'])
|
|
||||||
].copy()
|
|
||||||
if cand.empty:
|
|
||||||
continue
|
|
||||||
date_diff = (cand['norm_date'].dt.normalize() - s['norm_date'].normalize()).abs()
|
|
||||||
cand = cand[date_diff <= pd.Timedelta(days=1)]
|
|
||||||
if cand.empty:
|
|
||||||
continue
|
|
||||||
# pick the one with the smallest gap
|
|
||||||
best = date_diff.idxmin()
|
|
||||||
pairs.append((int(s['idx_stripe']), int(best)))
|
|
||||||
rr = rr.drop(best)
|
|
||||||
|
|
||||||
# --- build the merged DataFrame without suffixes ---
|
# Fuzzy merge within ±1 day for remaining
|
||||||
merged_rows = []
|
# Merge stripe_only with raisenow_only on name and amount
|
||||||
for s_idx, r_idx in pairs:
|
fuzzy = pd.merge(
|
||||||
srow = stripe.loc[s_idx].to_dict()
|
stripe_only.drop(columns=['_merge']),
|
||||||
rrow = raisenow.loc[r_idx].to_dict()
|
raisenow_only.drop(columns=['_merge']),
|
||||||
# drop any overlapping keys so we never get suffixes
|
on=['norm_amount', 'norm_name'],
|
||||||
for k in ['norm_amount','norm_name','norm_date','norm_email','idx_stripe']:
|
suffixes=('_stripe', '_raisenow')
|
||||||
rrow.pop(k, None)
|
)
|
||||||
# now combine so stripe values win for those keys, and raisenow adds its own columns
|
fuzzy['date_diff'] = (
|
||||||
merged = {**srow, **rrow}
|
fuzzy['norm_date_stripe'].dt.date - fuzzy['norm_date_raisenow'].dt.date
|
||||||
merged_rows.append(merged)
|
).abs()
|
||||||
|
fuzzy_matches = fuzzy[fuzzy['date_diff'] <= pd.Timedelta(days=1)].copy()
|
||||||
|
|
||||||
combined = pd.DataFrame(merged_rows)
|
# Combine exact and fuzzy
|
||||||
|
combined = pd.concat([exact_matches, fuzzy_matches], ignore_index=True)
|
||||||
|
combined = combined.drop(columns=['_merge', 'date_diff'], errors='ignore')
|
||||||
|
|
||||||
# --- slice out the requested view ---
|
# Determine outputs
|
||||||
if table_name == 'merged':
|
if table_name == 'merged':
|
||||||
result = combined
|
result = combined
|
||||||
elif table_name == 'stripe_only':
|
elif table_name == 'stripe_only':
|
||||||
used = {s for s, _ in pairs}
|
# Exclude those in combined
|
||||||
result = stripe[~stripe['idx_stripe'].isin(used)]
|
matched_stripe_ids = combined['<unique_id_column>_stripe'] if '<unique_id_column>_stripe' in combined else None
|
||||||
|
result = stripe_df[~stripe_df.index.isin(matched_stripe_ids)]
|
||||||
elif table_name == 'raisenow_only':
|
elif table_name == 'raisenow_only':
|
||||||
used = {r for _, r in pairs}
|
matched_raisenow_ids = combined['<unique_id_column>_raisenow'] if '<unique_id_column>_raisenow' in combined else None
|
||||||
result = raisenow[~raisenow['idx_raisenow'].isin(used)]
|
result = raisenow_df[~raisenow_df.index.isin(matched_raisenow_ids)]
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown table_name '{table_name}'")
|
raise ValueError(f"Unknown table_name '{table_name}'")
|
||||||
|
|
||||||
@ -171,6 +148,7 @@ def upload():
|
|||||||
raw = raw.dropna(how='all').dropna(axis=1, how='all')
|
raw = raw.dropna(how='all').dropna(axis=1, how='all')
|
||||||
raw = raw.astype(object).replace({np.nan: None})
|
raw = raw.astype(object).replace({np.nan: None})
|
||||||
cols = list(raw.columns)
|
cols = list(raw.columns)
|
||||||
|
|
||||||
if cols[:len(STRIPE_COLS)] == STRIPE_COLS:
|
if cols[:len(STRIPE_COLS)] == STRIPE_COLS:
|
||||||
key = 'stripe_import'
|
key = 'stripe_import'
|
||||||
dedupe_col = 'ID'
|
dedupe_col = 'ID'
|
||||||
@ -193,8 +171,10 @@ def upload():
|
|||||||
@app.route('/get_table')
|
@app.route('/get_table')
|
||||||
def get_table():
|
def get_table():
|
||||||
table = request.args.get('table')
|
table = request.args.get('table')
|
||||||
|
try:
|
||||||
df = get_merged_df(table)
|
df = get_merged_df(table)
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'error': str(e)}), 400
|
||||||
|
|
||||||
df = df.astype(object).where(pd.notnull(df), None)
|
df = df.astype(object).where(pd.notnull(df), None)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
@ -205,35 +185,13 @@ def get_table():
|
|||||||
|
|
||||||
@app.route('/download')
|
@app.route('/download')
|
||||||
def download():
|
def download():
|
||||||
sheets = {
|
sheets = { name: get_merged_df(name)
|
||||||
name: get_merged_df(name)
|
for name in ['stripe_import','raiseNow_import','merged','stripe_only','raisenow_only'] }
|
||||||
for name in [
|
|
||||||
'stripe_import',
|
|
||||||
'raiseNow_import',
|
|
||||||
'merged',
|
|
||||||
'stripe_only',
|
|
||||||
'raisenow_only'
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
output = BytesIO()
|
output = BytesIO()
|
||||||
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
||||||
workbook = writer.book
|
|
||||||
for name, df in sheets.items():
|
for name, df in sheets.items():
|
||||||
df.to_excel(writer, sheet_name=name, index=False)
|
df.to_excel(writer, sheet_name=name, index=False)
|
||||||
worksheet = writer.sheets[name]
|
|
||||||
|
|
||||||
# 1) Freeze header row
|
|
||||||
worksheet.freeze_panes(1, 0)
|
|
||||||
|
|
||||||
# 2) Autofilter on the header row across all columns
|
|
||||||
# (0,0) is the top-left cell; (len(df), len(df.columns)-1) covers all data rows
|
|
||||||
worksheet.autofilter(0, 0, df.shape[0], df.shape[1] - 1)
|
|
||||||
|
|
||||||
# 3) Set column widths to match first-row entries
|
|
||||||
first_row = df.iloc[0].astype(str)
|
|
||||||
for col_idx, cell_value in enumerate(first_row):
|
|
||||||
worksheet.set_column(col_idx, col_idx, len(cell_value) + 2)
|
|
||||||
|
|
||||||
output.seek(0)
|
output.seek(0)
|
||||||
return send_file(
|
return send_file(
|
||||||
|
|||||||
@ -2,4 +2,3 @@ Flask
|
|||||||
flask_session
|
flask_session
|
||||||
pandas
|
pandas
|
||||||
openpyxl
|
openpyxl
|
||||||
xlsxwriter
|
|
||||||
@ -70,6 +70,7 @@
|
|||||||
|
|
||||||
// error handling
|
// error handling
|
||||||
if (!resp.ok) {
|
if (!resp.ok) {
|
||||||
|
if (table) table.hideLoader();
|
||||||
return alert(json.error || 'Error loading');
|
return alert(json.error || 'Error loading');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user