fix file not mathing

This commit is contained in:
lelo 2025-06-18 13:59:18 +02:00
parent c631e74f5d
commit 09d8838886

View File

@ -11,8 +11,8 @@ app.config['SESSION_TYPE'] = 'filesystem'
app.config['SESSION_FILE_DIR'] = './.flask_session/'
Session(app)
STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount', 'Fees', 'Net', 'Converted Currency', 'Details']
RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung', 'Übernommene Gebühren - Betrag', 'Übernommene Gebühren - Währung', 'Zahlungsmethode', 'Zahlungsanbieter', 'Nettobetrag', 'Auszahlungswährung']
STRIPE_STARTING_COLS = ['Type', 'ID', 'Created', 'Description', 'Amount', 'Currency', 'Converted Amount']
RAISENOW_STARTING_COLS = ['Identifikationsnummer', 'Erstellt', 'UTC-Offset', 'Status', 'Betrag', 'Währung']
def get_dataframe(key):
@ -36,7 +36,7 @@ def get_merged_df(table_name): # return table_name: str
- no pandas merge suffixes at all
- all original columns (including Raisenow's norm_zweck) preserved
"""
print('calculated DataFrame')
print('calculate DataFrame...')
# --- load & normalize Stripe ---
stripe_import = get_dataframe('stripe_import')
@ -125,10 +125,11 @@ def get_merged_df(table_name): # return table_name: str
)
# additional assignment: build a mask of rows where norm_zweck is still empty/NaN
mask = raisenow['norm_zweck'].isna() | (raisenow['norm_zweck'] == '')
raisenow.loc[mask, 'norm_zweck'] = (
raisenow.loc[mask, 'raisenow_parameters.product.source_url']
.str.extract(r'https?://[^/]+/([^/?#]+)')[0]
)
if mask.any() and 'raisenow_parameters.product.source_url' in raisenow.columns:
raisenow.loc[mask, 'norm_zweck'] = (
raisenow.loc[mask, 'raisenow_parameters.product.source_url']
.str.extract(r'https?://[^/]+/([^/?#]+)')[0]
)
# --- return raw tables if requested ---
if table_name == 'stripe_import':
@ -343,10 +344,10 @@ def upload():
raw = (
pd.read_csv(f) if f.filename.lower().endswith('.csv') else pd.read_excel(f)
)
raw = raw.dropna(how='all').dropna(axis=1, how='all')
print('number of rows:', len(raw))
raw = raw.dropna(how='all')
raw = raw.astype(object).replace({np.nan: None})
cols = list(raw.columns)
if cols[:len(STRIPE_STARTING_COLS)] == STRIPE_STARTING_COLS:
key = 'stripe_import'
dedupe_col = 'ID'
@ -354,8 +355,12 @@ def upload():
key = 'raisenow_import'
dedupe_col = 'Identifikationsnummer'
else:
continue
print('file does not match expected formats:', f.filename)
return jsonify({
"status": "error",
"message": f"File '{f.filename}' does not match expected formats."
}), 400
existing = get_dataframe(key)
combined = pd.concat([existing, raw], ignore_index=True)
deduped = combined.drop_duplicates(subset=[dedupe_col], keep='first').reset_index(drop=True)