Template-Based CSV Import System with R2 Storage
Solution: Updated backend/r2_storage.py:
- Added ALLOWED_CSV_TYPES for CSV file validation
- Added upload_bytes() method for uploading raw bytes to R2
- Added download_file() method for retrieving files from R2
- Added delete_multiple() method for bulk file deletion
Comprehensive upload endpoint now stores CSVs in R2:
r2_storage = get_r2_storage()
for file_type, (content, filename) in file_contents.items():
_, r2_key, _ = await r2_storage.upload_bytes(
content=content,
folder=f"imports/{job_id}",
filename=f"{file_type}_{filename}",
content_type='text/csv'
)
r2_keys[file_type] = r2_key
---
2. Stripe Transaction ID Tracking
Solution: Updated subscription and donation imports to capture Stripe metadata:
Subscription fields:
- stripe_subscription_id
- stripe_customer_id
- stripe_payment_intent_id
- stripe_invoice_id
- stripe_charge_id
- stripe_receipt_url
- card_last4, card_brand, payment_method
Donation fields:
- stripe_payment_intent_id
- stripe_charge_id
- stripe_receipt_url
- card_last4, card_brand
---
3. Fixed JSON Serialization Error
Problem: Object of type datetime is not JSON serializable when saving import metadata.
Solution: Added serialize_for_json() helper in backend/server.py:
def serialize_for_json(obj):
"""Recursively convert datetime objects to ISO strings for JSON serialization."""
if isinstance(obj, (datetime, date)):
return obj.isoformat()
elif isinstance(obj, dict):
return {k: serialize_for_json(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [serialize_for_json(item) for item in obj]
# ... handles other types
---
4. Fixed Route Ordering (401 Unauthorized)
Problem: /admin/import/comprehensive/upload returned 401 because FastAPI matched "comprehensive" as a {job_id} parameter.
Solution: Moved comprehensive import routes BEFORE generic {job_id} routes in backend/server.py:
# Correct order:
@app.post("/api/admin/import/comprehensive/upload") # Specific route FIRST
# ... other comprehensive routes ...
@app.get("/api/admin/import/{job_id}/preview") # Generic route AFTER
---
5. Improved Date Parsing
Solution: Added additional date formats to backend/wordpress_parser.py:
formats = [
'%m/%d/%Y', '%Y-%m-%d', '%d/%m/%Y', '%B %d, %Y', '%b %d, %Y',
'%Y-%m-%d %H:%M:%S',
'%m/%Y', # Month/Year: 01/2020
'%m-%Y', # Month-Year: 01-2020
'%b-%Y', # Short month-Year: Jan-2020
'%B-%Y', # Full month-Year: January-2020
]
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
1133
import_templates.py
Normal file
1133
import_templates.py
Normal file
File diff suppressed because it is too large
Load Diff
129
r2_storage.py
129
r2_storage.py
@@ -50,6 +50,14 @@ class R2Storage:
|
||||
'image/svg+xml': ['.svg']
|
||||
}
|
||||
|
||||
# CSV files for imports
|
||||
ALLOWED_CSV_TYPES = {
|
||||
'text/csv': ['.csv'],
|
||||
'text/plain': ['.csv'], # Some systems report CSV as text/plain
|
||||
'application/csv': ['.csv'],
|
||||
'application/vnd.ms-excel': ['.csv'], # Old Excel type sometimes used for CSV
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize R2 client with credentials from environment"""
|
||||
self.account_id = os.getenv('R2_ACCOUNT_ID')
|
||||
@@ -240,6 +248,127 @@ class R2Storage:
|
||||
except ClientError:
|
||||
return False
|
||||
|
||||
async def upload_bytes(
|
||||
self,
|
||||
content: bytes,
|
||||
folder: str,
|
||||
filename: str,
|
||||
content_type: str = 'text/csv'
|
||||
) -> tuple[str, str, int]:
|
||||
"""
|
||||
Upload raw bytes to R2 storage (useful for CSV imports)
|
||||
|
||||
Args:
|
||||
content: Raw bytes to upload
|
||||
folder: Folder path in R2 (e.g., 'imports/job-id')
|
||||
filename: Original filename
|
||||
content_type: MIME type of the content
|
||||
|
||||
Returns:
|
||||
tuple: (public_url, object_key, file_size_bytes)
|
||||
|
||||
Raises:
|
||||
HTTPException: If upload fails
|
||||
"""
|
||||
try:
|
||||
file_size = len(content)
|
||||
|
||||
# Generate unique filename preserving original extension
|
||||
file_extension = Path(filename).suffix.lower() or '.csv'
|
||||
unique_filename = f"{uuid.uuid4()}{file_extension}"
|
||||
object_key = f"{folder}/{unique_filename}"
|
||||
|
||||
# Upload to R2
|
||||
self.client.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=object_key,
|
||||
Body=content,
|
||||
ContentType=content_type,
|
||||
ContentLength=file_size
|
||||
)
|
||||
|
||||
# Generate public URL
|
||||
public_url = self.get_public_url(object_key)
|
||||
|
||||
return public_url, object_key, file_size
|
||||
|
||||
except ClientError as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to upload to R2: {str(e)}"
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Upload error: {str(e)}"
|
||||
)
|
||||
|
||||
async def download_file(self, object_key: str) -> bytes:
|
||||
"""
|
||||
Download a file from R2 storage
|
||||
|
||||
Args:
|
||||
object_key: The S3 object key (path) of the file
|
||||
|
||||
Returns:
|
||||
bytes: File content
|
||||
|
||||
Raises:
|
||||
HTTPException: If download fails
|
||||
"""
|
||||
try:
|
||||
response = self.client.get_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=object_key
|
||||
)
|
||||
return response['Body'].read()
|
||||
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'NoSuchKey':
|
||||
raise HTTPException(status_code=404, detail="File not found in storage")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to download file from R2: {str(e)}"
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Download error: {str(e)}"
|
||||
)
|
||||
|
||||
async def delete_multiple(self, object_keys: list[str]) -> bool:
|
||||
"""
|
||||
Delete multiple files from R2 storage
|
||||
|
||||
Args:
|
||||
object_keys: List of S3 object keys to delete
|
||||
|
||||
Returns:
|
||||
bool: True if successful
|
||||
|
||||
Raises:
|
||||
HTTPException: If deletion fails
|
||||
"""
|
||||
if not object_keys:
|
||||
return True
|
||||
|
||||
try:
|
||||
# R2/S3 delete_objects accepts up to 1000 keys at once
|
||||
objects = [{'Key': key} for key in object_keys if key]
|
||||
|
||||
if objects:
|
||||
self.client.delete_objects(
|
||||
Bucket=self.bucket_name,
|
||||
Delete={'Objects': objects}
|
||||
)
|
||||
return True
|
||||
|
||||
except ClientError as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to delete files from R2: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_r2_storage = None
|
||||
|
||||
@@ -10,21 +10,127 @@ Key Features:
|
||||
- Validate and standardize user data (DOB, phone numbers)
|
||||
- Generate smart status suggestions based on approval and subscription data
|
||||
- Comprehensive data quality analysis and error reporting
|
||||
- Multi-file import support (Users, Members, Payments CSVs)
|
||||
- Field mapping based on Meta Name Reference document
|
||||
|
||||
Author: Claude Code
|
||||
Date: 2025-12-24
|
||||
Updated: 2026-02-03 - Added comprehensive multi-file import support
|
||||
"""
|
||||
|
||||
import csv
|
||||
import re
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
import phpserialize
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Meta Name Reference Field Mapping (from client's WordPress export)
|
||||
# ============================================================================
|
||||
|
||||
# Maps WordPress meta names to our database fields
|
||||
# Format: 'wordpress_meta_name': ('db_field', 'field_type', 'parser_function')
|
||||
META_FIELD_MAPPING = {
|
||||
# Basic user info
|
||||
'first_name': ('first_name', 'string', None),
|
||||
'last_name': ('last_name', 'string', None),
|
||||
'user_email': ('email', 'string', 'lowercase'),
|
||||
'user_login': ('username', 'string', None), # For reference only
|
||||
'address': ('address', 'string', None),
|
||||
'city': ('city', 'string', None),
|
||||
'state': ('state', 'string', None),
|
||||
'zipcode': ('zipcode', 'string', None),
|
||||
'cell_phone': ('phone', 'string', 'phone'),
|
||||
'date_of_birth': ('date_of_birth', 'date', 'date_mmddyyyy'),
|
||||
|
||||
# Partner info
|
||||
'partner_first_name': ('partner_first_name', 'string', None),
|
||||
'partner_last_name': ('partner_last_name', 'string', None),
|
||||
'partner_membership_status': ('partner_is_member', 'boolean', 'yes_no'),
|
||||
'partner_membership_consideration': ('partner_plan_to_become_member', 'boolean', 'yes_no'),
|
||||
|
||||
# Newsletter preferences
|
||||
'newsletter_consent': ('newsletter_subscribed', 'boolean', 'yes_no'),
|
||||
'newsletter_checklist': ('newsletter_preferences', 'multi_value', 'newsletter_checklist'),
|
||||
|
||||
# Referral and lead sources
|
||||
'member_referral': ('referred_by_member_name', 'string', None),
|
||||
'referral_source': ('lead_sources', 'multi_value', 'lead_sources'),
|
||||
|
||||
# Volunteer interests
|
||||
'volunteer_checklist': ('volunteer_interests', 'multi_value', 'volunteer_checklist'),
|
||||
|
||||
# Scholarship
|
||||
'scholarship_request': ('scholarship_requested', 'boolean', 'yes_no'),
|
||||
'scholarship_reason': ('scholarship_reason', 'string', None),
|
||||
|
||||
# Directory settings
|
||||
'members_directory_filter': ('show_in_directory', 'boolean', 'yes_no'),
|
||||
'md_display_name': ('custom_registration_data.directory_display_name', 'custom', None),
|
||||
'md_email': ('directory_email', 'string', None),
|
||||
'description': ('directory_bio', 'string', None),
|
||||
'md_adress': ('directory_address', 'string', None), # Note: typo in WordPress
|
||||
'md_phone': ('directory_phone', 'string', None),
|
||||
'md_dob': ('directory_dob', 'date', 'date_mmddyyyy'),
|
||||
'md_partner_name': ('directory_partner_name', 'string', None),
|
||||
'md_avatar': ('profile_photo_url', 'string', None),
|
||||
|
||||
# Metadata
|
||||
'member_since': ('member_since', 'date', 'date_various'),
|
||||
'user_registered': ('wordpress_registered_date', 'datetime', 'datetime_mysql'),
|
||||
'ID': ('wordpress_user_id', 'integer', None),
|
||||
|
||||
# Stripe info (from WordPress)
|
||||
'pms_stripe_customer_id': ('stripe_customer_id', 'string', None),
|
||||
}
|
||||
|
||||
# Newsletter checklist option mapping
|
||||
NEWSLETTER_CHECKLIST_OPTIONS = {
|
||||
'name': 'newsletter_publish_name',
|
||||
'photo': 'newsletter_publish_photo',
|
||||
'birthday': 'newsletter_publish_birthday',
|
||||
'none': 'newsletter_publish_none',
|
||||
# Handle various WordPress stored formats
|
||||
'my name': 'newsletter_publish_name',
|
||||
'my photo': 'newsletter_publish_photo',
|
||||
'my birthday': 'newsletter_publish_birthday',
|
||||
}
|
||||
|
||||
# Volunteer interests mapping (WordPress values to our format)
|
||||
VOLUNTEER_INTERESTS_MAP = {
|
||||
'events': 'Events',
|
||||
'fundraising': 'Fundraising',
|
||||
'communications': 'Communications',
|
||||
'membership': 'Membership',
|
||||
'board': 'Board of Directors',
|
||||
'other': 'Other',
|
||||
# Handle various WordPress formats
|
||||
'help with events': 'Events',
|
||||
'help with fundraising': 'Fundraising',
|
||||
'help with communications': 'Communications',
|
||||
'help with membership': 'Membership',
|
||||
'serve on the board': 'Board of Directors',
|
||||
}
|
||||
|
||||
# Lead sources mapping
|
||||
LEAD_SOURCES_MAP = {
|
||||
'current member': 'Current member',
|
||||
'friend': 'Friend',
|
||||
'outsmart magazine': 'OutSmart Magazine',
|
||||
'outsmart': 'OutSmart Magazine',
|
||||
'search engine': 'Search engine (Google etc.)',
|
||||
'google': 'Search engine (Google etc.)',
|
||||
'known about loaf': "I've known about LOAF for a long time",
|
||||
'long time': "I've known about LOAF for a long time",
|
||||
'other': 'Other',
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# WordPress Role Mapping Configuration
|
||||
# ============================================================================
|
||||
@@ -283,6 +389,622 @@ def validate_dob(dob_str: str) -> Tuple[Optional[datetime], Optional[str]]:
|
||||
return None, f'Invalid date format: {dob_str} (expected MM/DD/YYYY)'
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Enhanced Field Parsers for Meta Name Reference
|
||||
# ============================================================================
|
||||
|
||||
def parse_boolean_yes_no(value: Any) -> bool:
|
||||
"""
|
||||
Parse yes/no style boolean values from WordPress.
|
||||
|
||||
Handles: yes, no, true, false, 1, 0, checked, unchecked
|
||||
"""
|
||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
||||
return False
|
||||
|
||||
str_val = str(value).lower().strip()
|
||||
return str_val in ('yes', 'true', '1', 'checked', 'on', 'y')
|
||||
|
||||
|
||||
def parse_date_various(date_str: Any) -> Optional[datetime]:
|
||||
"""
|
||||
Parse dates in various formats commonly found in WordPress exports.
|
||||
|
||||
Handles:
|
||||
- MM/DD/YYYY (US format)
|
||||
- YYYY-MM-DD (ISO format)
|
||||
- DD/MM/YYYY (EU format - attempted if US fails)
|
||||
- Month DD, YYYY (e.g., "January 15, 2020")
|
||||
"""
|
||||
if date_str is None or (isinstance(date_str, float) and pd.isna(date_str)):
|
||||
return None
|
||||
|
||||
date_str = str(date_str).strip()
|
||||
if not date_str or date_str.lower() == 'nan':
|
||||
return None
|
||||
|
||||
# Try various formats
|
||||
formats = [
|
||||
'%m/%d/%Y', # US: 01/15/2020
|
||||
'%Y-%m-%d', # ISO: 2020-01-15
|
||||
'%d/%m/%Y', # EU: 15/01/2020
|
||||
'%B %d, %Y', # Full: January 15, 2020
|
||||
'%b %d, %Y', # Short: Jan 15, 2020
|
||||
'%Y-%m-%d %H:%M:%S', # MySQL datetime
|
||||
'%m/%Y', # Month/Year: 01/2020
|
||||
'%m-%Y', # Month-Year: 01-2020
|
||||
'%b-%Y', # Short month-Year: Jan-2020
|
||||
'%B-%Y', # Full month-Year: January-2020
|
||||
]
|
||||
|
||||
for fmt in formats:
|
||||
try:
|
||||
parsed = datetime.strptime(date_str, fmt)
|
||||
# Validate year range
|
||||
if 1900 <= parsed.year <= datetime.now().year + 1:
|
||||
return parsed
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Only log warning for strings that look like dates
|
||||
if date_str and len(date_str) > 3:
|
||||
logger.debug(f"Could not parse date: {date_str}")
|
||||
return None
|
||||
|
||||
|
||||
def parse_datetime_mysql(dt_str: Any) -> Optional[datetime]:
|
||||
"""Parse MySQL datetime format: YYYY-MM-DD HH:MM:SS"""
|
||||
if dt_str is None or (isinstance(dt_str, float) and pd.isna(dt_str)):
|
||||
return None
|
||||
|
||||
try:
|
||||
return datetime.strptime(str(dt_str).strip(), '%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
return parse_date_various(dt_str)
|
||||
|
||||
|
||||
def parse_newsletter_checklist(value: Any) -> Dict[str, bool]:
|
||||
"""
|
||||
Parse newsletter checklist multi-value field.
|
||||
|
||||
WordPress stores this as comma-separated or PHP serialized values.
|
||||
Returns dict mapping to our newsletter_publish_* fields.
|
||||
"""
|
||||
result = {
|
||||
'newsletter_publish_name': False,
|
||||
'newsletter_publish_photo': False,
|
||||
'newsletter_publish_birthday': False,
|
||||
'newsletter_publish_none': False,
|
||||
}
|
||||
|
||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
||||
return result
|
||||
|
||||
str_val = str(value).lower().strip()
|
||||
if not str_val or str_val == 'nan':
|
||||
return result
|
||||
|
||||
# Try PHP serialized first
|
||||
if str_val.startswith('a:'):
|
||||
try:
|
||||
parsed = phpserialize.loads(str_val.encode('utf-8'))
|
||||
if isinstance(parsed, dict):
|
||||
for key in parsed.keys():
|
||||
key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
|
||||
key_lower = key_str.lower()
|
||||
for match_key, field in NEWSLETTER_CHECKLIST_OPTIONS.items():
|
||||
if match_key in key_lower:
|
||||
result[field] = True
|
||||
return result
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try comma-separated values
|
||||
items = [item.strip().lower() for item in str_val.split(',')]
|
||||
for item in items:
|
||||
for match_key, field in NEWSLETTER_CHECKLIST_OPTIONS.items():
|
||||
if match_key in item:
|
||||
result[field] = True
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_volunteer_checklist(value: Any) -> List[str]:
|
||||
"""
|
||||
Parse volunteer interests checklist.
|
||||
|
||||
Returns list of standardized volunteer interest labels.
|
||||
"""
|
||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
||||
return []
|
||||
|
||||
str_val = str(value).lower().strip()
|
||||
if not str_val or str_val == 'nan':
|
||||
return []
|
||||
|
||||
interests = []
|
||||
|
||||
# Try PHP serialized first
|
||||
if str_val.startswith('a:'):
|
||||
try:
|
||||
parsed = phpserialize.loads(str_val.encode('utf-8'))
|
||||
if isinstance(parsed, dict):
|
||||
for key in parsed.keys():
|
||||
key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
|
||||
key_lower = key_str.lower()
|
||||
for match_key, label in VOLUNTEER_INTERESTS_MAP.items():
|
||||
if match_key in key_lower and label not in interests:
|
||||
interests.append(label)
|
||||
return interests
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try comma-separated values
|
||||
items = [item.strip().lower() for item in str_val.split(',')]
|
||||
for item in items:
|
||||
for match_key, label in VOLUNTEER_INTERESTS_MAP.items():
|
||||
if match_key in item and label not in interests:
|
||||
interests.append(label)
|
||||
|
||||
return interests
|
||||
|
||||
|
||||
def parse_lead_sources(value: Any) -> List[str]:
|
||||
"""
|
||||
Parse referral/lead sources field.
|
||||
|
||||
Returns list of standardized lead source labels.
|
||||
"""
|
||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
||||
return []
|
||||
|
||||
str_val = str(value).lower().strip()
|
||||
if not str_val or str_val == 'nan':
|
||||
return []
|
||||
|
||||
sources = []
|
||||
|
||||
# Try PHP serialized first
|
||||
if str_val.startswith('a:'):
|
||||
try:
|
||||
parsed = phpserialize.loads(str_val.encode('utf-8'))
|
||||
if isinstance(parsed, dict):
|
||||
for key in parsed.keys():
|
||||
key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
|
||||
key_lower = key_str.lower()
|
||||
for match_key, label in LEAD_SOURCES_MAP.items():
|
||||
if match_key in key_lower and label not in sources:
|
||||
sources.append(label)
|
||||
return sources
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try comma-separated values
|
||||
items = [item.strip().lower() for item in str_val.split(',')]
|
||||
for item in items:
|
||||
matched = False
|
||||
for match_key, label in LEAD_SOURCES_MAP.items():
|
||||
if match_key in item and label not in sources:
|
||||
sources.append(label)
|
||||
matched = True
|
||||
break
|
||||
# If no match, add as "Other" with original value
|
||||
if not matched and item:
|
||||
sources.append('Other')
|
||||
|
||||
return sources
|
||||
|
||||
|
||||
def transform_csv_row_to_user_data(row: Dict[str, Any], existing_emails: set = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Transform a CSV row to user data dictionary using Meta Name Reference mapping.
|
||||
|
||||
Args:
|
||||
row: Dictionary of CSV column values
|
||||
existing_emails: Set of emails already in database (for duplicate check)
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- user_data: Fields that map to User model
|
||||
- custom_data: Fields for custom_registration_data JSON
|
||||
- newsletter_prefs: Newsletter preference booleans
|
||||
- warnings: List of warning messages
|
||||
- errors: List of error messages
|
||||
"""
|
||||
user_data = {}
|
||||
custom_data = {}
|
||||
newsletter_prefs = {}
|
||||
warnings = []
|
||||
errors = []
|
||||
|
||||
# Process each mapped field
|
||||
for csv_field, (db_field, field_type, parser) in META_FIELD_MAPPING.items():
|
||||
value = row.get(csv_field)
|
||||
|
||||
# Skip if no value
|
||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse based on field type
|
||||
if field_type == 'string':
|
||||
if parser == 'lowercase':
|
||||
parsed_value = str(value).strip().lower()
|
||||
elif parser == 'phone':
|
||||
parsed_value = standardize_phone(value)
|
||||
if parsed_value == '0000000000':
|
||||
warnings.append(f'Invalid phone: {value}')
|
||||
else:
|
||||
parsed_value = str(value).strip() if value else None
|
||||
|
||||
elif field_type == 'integer':
|
||||
parsed_value = int(value) if value else None
|
||||
|
||||
elif field_type == 'boolean':
|
||||
parsed_value = parse_boolean_yes_no(value)
|
||||
|
||||
elif field_type == 'date':
|
||||
if parser == 'date_mmddyyyy':
|
||||
parsed_value, warning = validate_dob(value)
|
||||
if warning:
|
||||
warnings.append(warning)
|
||||
else:
|
||||
parsed_value = parse_date_various(value)
|
||||
|
||||
elif field_type == 'datetime':
|
||||
parsed_value = parse_datetime_mysql(value)
|
||||
|
||||
elif field_type == 'multi_value':
|
||||
if parser == 'newsletter_checklist':
|
||||
newsletter_prefs = parse_newsletter_checklist(value)
|
||||
continue # Handled separately
|
||||
elif parser == 'volunteer_checklist':
|
||||
parsed_value = parse_volunteer_checklist(value)
|
||||
elif parser == 'lead_sources':
|
||||
parsed_value = parse_lead_sources(value)
|
||||
else:
|
||||
parsed_value = [str(value)]
|
||||
|
||||
elif field_type == 'custom':
|
||||
# Store in custom_registration_data
|
||||
custom_field = db_field.replace('custom_registration_data.', '')
|
||||
custom_data[custom_field] = str(value).strip() if value else None
|
||||
continue
|
||||
|
||||
else:
|
||||
parsed_value = value
|
||||
|
||||
# Store in appropriate location
|
||||
if parsed_value is not None:
|
||||
user_data[db_field] = parsed_value
|
||||
|
||||
except Exception as e:
|
||||
warnings.append(f'Error parsing {csv_field}: {str(e)}')
|
||||
|
||||
# Check for required fields
|
||||
if not user_data.get('email'):
|
||||
errors.append('Missing email address')
|
||||
elif existing_emails and user_data['email'] in existing_emails:
|
||||
errors.append('Email already exists in database')
|
||||
|
||||
if not user_data.get('first_name'):
|
||||
warnings.append('Missing first name')
|
||||
|
||||
if not user_data.get('last_name'):
|
||||
warnings.append('Missing last name')
|
||||
|
||||
return {
|
||||
'user_data': user_data,
|
||||
'custom_data': custom_data,
|
||||
'newsletter_prefs': newsletter_prefs,
|
||||
'warnings': warnings,
|
||||
'errors': errors
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Members CSV Parser (Subscription Data)
|
||||
# ============================================================================
|
||||
|
||||
def parse_members_csv(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse WordPress PMS Members export CSV for subscription data.
|
||||
|
||||
Args:
|
||||
file_path: Path to pms-export-members CSV file
|
||||
|
||||
Returns:
|
||||
Dictionary mapping user_email to subscription data
|
||||
"""
|
||||
members_data = {}
|
||||
|
||||
try:
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
for _, row in df.iterrows():
|
||||
email = str(row.get('user_email', '')).strip().lower()
|
||||
if not email or email == 'nan':
|
||||
continue
|
||||
|
||||
# Parse subscription dates
|
||||
start_date = parse_date_various(row.get('start_date'))
|
||||
expiration_date = parse_date_various(row.get('expiration_date'))
|
||||
|
||||
# Map subscription status
|
||||
wp_status = str(row.get('status', '')).lower().strip()
|
||||
if wp_status == 'active':
|
||||
sub_status = 'active'
|
||||
elif wp_status in ('expired', 'abandoned'):
|
||||
sub_status = 'expired'
|
||||
elif wp_status in ('canceled', 'cancelled'):
|
||||
sub_status = 'cancelled'
|
||||
else:
|
||||
sub_status = 'active' # Default
|
||||
|
||||
# Parse payment gateway
|
||||
payment_gateway = str(row.get('payment_gateway', '')).lower().strip()
|
||||
if 'stripe' in payment_gateway:
|
||||
payment_method = 'stripe'
|
||||
elif 'paypal' in payment_gateway:
|
||||
payment_method = 'paypal'
|
||||
elif payment_gateway in ('manual', 'admin', ''):
|
||||
payment_method = 'manual'
|
||||
else:
|
||||
payment_method = payment_gateway or 'manual'
|
||||
|
||||
members_data[email] = {
|
||||
'subscription_plan_id': row.get('subscription_plan_id'),
|
||||
'subscription_plan_name': row.get('subscription_plan_name'),
|
||||
'start_date': start_date,
|
||||
'end_date': expiration_date,
|
||||
'status': sub_status,
|
||||
'payment_method': payment_method,
|
||||
'wordpress_user_id': row.get('user_id'),
|
||||
'billing_first_name': row.get('billing_first_name'),
|
||||
'billing_last_name': row.get('billing_last_name'),
|
||||
'billing_address': row.get('billing_address'),
|
||||
'billing_city': row.get('billing_city'),
|
||||
'billing_state': row.get('billing_state'),
|
||||
'billing_zip': row.get('billing_zip'),
|
||||
'card_last4': row.get('billing_card_last4'),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing members CSV: {str(e)}")
|
||||
raise
|
||||
|
||||
return members_data
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Payments CSV Parser (Payment History)
|
||||
# ============================================================================
|
||||
|
||||
def parse_payments_csv(file_path: str) -> Dict[str, List[Dict]]:
|
||||
"""
|
||||
Parse WordPress PMS Payments export CSV for payment history.
|
||||
|
||||
Args:
|
||||
file_path: Path to pms-export-payments CSV file
|
||||
|
||||
Returns:
|
||||
Dictionary mapping user_email to list of payment records
|
||||
"""
|
||||
payments_data = {}
|
||||
|
||||
try:
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
for _, row in df.iterrows():
|
||||
email = str(row.get('user_email', '')).strip().lower()
|
||||
if not email or email == 'nan':
|
||||
continue
|
||||
|
||||
# Parse payment date
|
||||
payment_date = parse_date_various(row.get('date'))
|
||||
|
||||
# Parse amount (convert to cents)
|
||||
amount_str = str(row.get('amount', '0')).replace('$', '').replace(',', '').strip()
|
||||
try:
|
||||
amount_cents = int(float(amount_str) * 100)
|
||||
except (ValueError, TypeError):
|
||||
amount_cents = 0
|
||||
|
||||
# Map payment status
|
||||
wp_status = str(row.get('status', '')).lower().strip()
|
||||
if wp_status == 'completed':
|
||||
payment_status = 'completed'
|
||||
elif wp_status in ('pending', 'processing'):
|
||||
payment_status = 'pending'
|
||||
elif wp_status in ('failed', 'refunded'):
|
||||
payment_status = 'failed'
|
||||
else:
|
||||
payment_status = 'completed' # Default for historical data
|
||||
|
||||
payment_record = {
|
||||
'payment_id': row.get('payment_id'),
|
||||
'amount_cents': amount_cents,
|
||||
'status': payment_status,
|
||||
'date': payment_date,
|
||||
'payment_gateway': row.get('payment_gateway'),
|
||||
'transaction_id': row.get('transaction_id'),
|
||||
'profile_id': row.get('profile_id'),
|
||||
'subscription_plan_id': row.get('subscription_plan_id'),
|
||||
'wordpress_user_id': row.get('user_id'),
|
||||
}
|
||||
|
||||
if email not in payments_data:
|
||||
payments_data[email] = []
|
||||
payments_data[email].append(payment_record)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing payments CSV: {str(e)}")
|
||||
raise
|
||||
|
||||
return payments_data
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Comprehensive Import Analysis
|
||||
# ============================================================================
|
||||
|
||||
def analyze_comprehensive_import(
|
||||
users_csv_path: str,
|
||||
members_csv_path: Optional[str] = None,
|
||||
payments_csv_path: Optional[str] = None,
|
||||
existing_emails: Optional[set] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze all CSV files for comprehensive import with cross-referencing.
|
||||
|
||||
Args:
|
||||
users_csv_path: Path to WordPress users export CSV (required)
|
||||
members_csv_path: Path to PMS members CSV (optional)
|
||||
payments_csv_path: Path to PMS payments CSV (optional)
|
||||
existing_emails: Set of emails already in database
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis with preview data for all files
|
||||
"""
|
||||
if existing_emails is None:
|
||||
existing_emails = set()
|
||||
|
||||
result = {
|
||||
'users': {'total': 0, 'valid': 0, 'warnings': 0, 'errors': 0, 'preview': []},
|
||||
'members': {'total': 0, 'matched': 0, 'unmatched': 0, 'data': {}},
|
||||
'payments': {'total': 0, 'matched': 0, 'total_amount_cents': 0, 'data': {}},
|
||||
'summary': {
|
||||
'total_users': 0,
|
||||
'importable_users': 0,
|
||||
'duplicate_emails': 0,
|
||||
'users_with_subscriptions': 0,
|
||||
'users_with_payments': 0,
|
||||
'total_payment_amount': 0,
|
||||
}
|
||||
}
|
||||
|
||||
# Parse members CSV if provided
|
||||
members_data = {}
|
||||
if members_csv_path:
|
||||
try:
|
||||
members_data = parse_members_csv(members_csv_path)
|
||||
result['members']['total'] = len(members_data)
|
||||
result['members']['data'] = members_data
|
||||
except Exception as e:
|
||||
result['members']['error'] = str(e)
|
||||
|
||||
# Parse payments CSV if provided
|
||||
payments_data = {}
|
||||
if payments_csv_path:
|
||||
try:
|
||||
payments_data = parse_payments_csv(payments_csv_path)
|
||||
result['payments']['total'] = sum(len(p) for p in payments_data.values())
|
||||
result['payments']['data'] = payments_data
|
||||
result['payments']['total_amount_cents'] = sum(
|
||||
sum(p['amount_cents'] for p in payments)
|
||||
for payments in payments_data.values()
|
||||
)
|
||||
except Exception as e:
|
||||
result['payments']['error'] = str(e)
|
||||
|
||||
# Parse users CSV
|
||||
try:
|
||||
df = pd.read_csv(users_csv_path)
|
||||
result['users']['total'] = len(df)
|
||||
|
||||
seen_emails = set()
|
||||
total_warnings = 0
|
||||
total_errors = 0
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
row_dict = row.to_dict()
|
||||
transformed = transform_csv_row_to_user_data(row_dict, existing_emails)
|
||||
|
||||
email = transformed['user_data'].get('email', '').lower()
|
||||
|
||||
# Check for CSV duplicates
|
||||
if email in seen_emails:
|
||||
transformed['errors'].append(f'Duplicate email in CSV')
|
||||
elif email:
|
||||
seen_emails.add(email)
|
||||
|
||||
# Cross-reference with members data
|
||||
subscription_data = members_data.get(email)
|
||||
if subscription_data:
|
||||
result['members']['matched'] += 1
|
||||
|
||||
# Cross-reference with payments data
|
||||
payment_records = payments_data.get(email, [])
|
||||
if payment_records:
|
||||
result['payments']['matched'] += 1
|
||||
|
||||
# Parse WordPress roles for role/status suggestion
|
||||
wp_capabilities = row.get('wp_capabilities', '')
|
||||
wp_roles = parse_php_serialized(wp_capabilities)
|
||||
loaf_role, role_status = map_wordpress_role(wp_roles)
|
||||
|
||||
# Determine status
|
||||
approval_status = str(row.get('wppb_approval_status', '')).strip()
|
||||
has_subscription = 'pms_subscription_plan_63' in wp_roles or subscription_data is not None
|
||||
|
||||
if role_status:
|
||||
suggested_status = role_status
|
||||
else:
|
||||
suggested_status = suggest_status(approval_status, has_subscription, loaf_role)
|
||||
|
||||
# Build preview row
|
||||
preview_row = {
|
||||
'row_number': idx + 1,
|
||||
'email': email,
|
||||
'first_name': transformed['user_data'].get('first_name', ''),
|
||||
'last_name': transformed['user_data'].get('last_name', ''),
|
||||
'phone': transformed['user_data'].get('phone', ''),
|
||||
'date_of_birth': transformed['user_data'].get('date_of_birth').isoformat() if transformed['user_data'].get('date_of_birth') else None,
|
||||
'wordpress_user_id': transformed['user_data'].get('wordpress_user_id'),
|
||||
'wordpress_roles': wp_roles,
|
||||
'suggested_role': loaf_role,
|
||||
'suggested_status': suggested_status,
|
||||
'has_subscription': has_subscription,
|
||||
'subscription_data': subscription_data,
|
||||
'payment_count': len(payment_records),
|
||||
'total_paid_cents': sum(p['amount_cents'] for p in payment_records),
|
||||
'user_data': transformed['user_data'],
|
||||
'custom_data': transformed['custom_data'],
|
||||
'newsletter_prefs': transformed['newsletter_prefs'],
|
||||
'warnings': transformed['warnings'],
|
||||
'errors': transformed['errors'],
|
||||
}
|
||||
|
||||
result['users']['preview'].append(preview_row)
|
||||
total_warnings += len(transformed['warnings'])
|
||||
total_errors += len(transformed['errors'])
|
||||
|
||||
if not transformed['errors']:
|
||||
result['users']['valid'] += 1
|
||||
|
||||
result['users']['warnings'] = total_warnings
|
||||
result['users']['errors'] = total_errors
|
||||
|
||||
# Calculate unmatched members
|
||||
user_emails = {p['email'] for p in result['users']['preview'] if p['email']}
|
||||
result['members']['unmatched'] = len(set(members_data.keys()) - user_emails)
|
||||
|
||||
# Summary stats
|
||||
result['summary']['total_users'] = result['users']['total']
|
||||
result['summary']['importable_users'] = result['users']['valid']
|
||||
result['summary']['duplicate_emails'] = len(seen_emails & existing_emails)
|
||||
result['summary']['users_with_subscriptions'] = result['members']['matched']
|
||||
result['summary']['users_with_payments'] = result['payments']['matched']
|
||||
result['summary']['total_payment_amount'] = result['payments']['total_amount_cents']
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing users CSV: {str(e)}")
|
||||
result['users']['error'] = str(e)
|
||||
raise
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CSV Analysis and Preview Generation
|
||||
# ============================================================================
|
||||
@@ -344,8 +1066,6 @@ def analyze_csv(file_path: str, existing_emails: Optional[set] = None) -> Dict:
|
||||
}
|
||||
}
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
# Read CSV with pandas
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
@@ -521,11 +1241,4 @@ def format_preview_for_display(preview_data: List[Dict], page: int = 1, page_siz
|
||||
# Module Initialization
|
||||
# ============================================================================
|
||||
|
||||
# Import pandas for CSV processing
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
logger.error("pandas library not found. Please install: pip install pandas")
|
||||
raise
|
||||
|
||||
logger.info("WordPress parser module loaded successfully")
|
||||
|
||||
Reference in New Issue
Block a user