Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a807d97345 | |||
| 7d61eddcef | |||
| a5fc42b353 | |||
| 37b1ab75df | |||
| f915976cb3 | |||
| 9c5aafc57b | |||
| 3755a71ed8 | |||
| b2293a5588 | |||
| 9f29bf05d8 | |||
| b44d55919e | |||
| 1a6341a94c | |||
| 727cbf4b5c | |||
| 9c3f3c88b8 | |||
| 849a6a32af | |||
| 69b8185414 | |||
| f5f8ca8dc6 | |||
| 661a4cbb7c | |||
| a01a8b9915 | |||
| e126cb988c | |||
| fd988241a1 | |||
| c28eddca67 | |||
| e20542ccdc | |||
| b3f1f5f789 | |||
| 1da045f73f |
Binary file not shown.
Binary file not shown.
Binary file not shown.
1133
import_templates.py
1133
import_templates.py
File diff suppressed because it is too large
Load Diff
129
r2_storage.py
129
r2_storage.py
@@ -50,14 +50,6 @@ class R2Storage:
|
|||||||
'image/svg+xml': ['.svg']
|
'image/svg+xml': ['.svg']
|
||||||
}
|
}
|
||||||
|
|
||||||
# CSV files for imports
|
|
||||||
ALLOWED_CSV_TYPES = {
|
|
||||||
'text/csv': ['.csv'],
|
|
||||||
'text/plain': ['.csv'], # Some systems report CSV as text/plain
|
|
||||||
'application/csv': ['.csv'],
|
|
||||||
'application/vnd.ms-excel': ['.csv'], # Old Excel type sometimes used for CSV
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
"""Initialize R2 client with credentials from environment"""
|
"""Initialize R2 client with credentials from environment"""
|
||||||
self.account_id = os.getenv('R2_ACCOUNT_ID')
|
self.account_id = os.getenv('R2_ACCOUNT_ID')
|
||||||
@@ -248,127 +240,6 @@ class R2Storage:
|
|||||||
except ClientError:
|
except ClientError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def upload_bytes(
|
|
||||||
self,
|
|
||||||
content: bytes,
|
|
||||||
folder: str,
|
|
||||||
filename: str,
|
|
||||||
content_type: str = 'text/csv'
|
|
||||||
) -> tuple[str, str, int]:
|
|
||||||
"""
|
|
||||||
Upload raw bytes to R2 storage (useful for CSV imports)
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Raw bytes to upload
|
|
||||||
folder: Folder path in R2 (e.g., 'imports/job-id')
|
|
||||||
filename: Original filename
|
|
||||||
content_type: MIME type of the content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
tuple: (public_url, object_key, file_size_bytes)
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: If upload fails
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
file_size = len(content)
|
|
||||||
|
|
||||||
# Generate unique filename preserving original extension
|
|
||||||
file_extension = Path(filename).suffix.lower() or '.csv'
|
|
||||||
unique_filename = f"{uuid.uuid4()}{file_extension}"
|
|
||||||
object_key = f"{folder}/{unique_filename}"
|
|
||||||
|
|
||||||
# Upload to R2
|
|
||||||
self.client.put_object(
|
|
||||||
Bucket=self.bucket_name,
|
|
||||||
Key=object_key,
|
|
||||||
Body=content,
|
|
||||||
ContentType=content_type,
|
|
||||||
ContentLength=file_size
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generate public URL
|
|
||||||
public_url = self.get_public_url(object_key)
|
|
||||||
|
|
||||||
return public_url, object_key, file_size
|
|
||||||
|
|
||||||
except ClientError as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Failed to upload to R2: {str(e)}"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Upload error: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def download_file(self, object_key: str) -> bytes:
|
|
||||||
"""
|
|
||||||
Download a file from R2 storage
|
|
||||||
|
|
||||||
Args:
|
|
||||||
object_key: The S3 object key (path) of the file
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bytes: File content
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: If download fails
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
response = self.client.get_object(
|
|
||||||
Bucket=self.bucket_name,
|
|
||||||
Key=object_key
|
|
||||||
)
|
|
||||||
return response['Body'].read()
|
|
||||||
|
|
||||||
except ClientError as e:
|
|
||||||
if e.response['Error']['Code'] == 'NoSuchKey':
|
|
||||||
raise HTTPException(status_code=404, detail="File not found in storage")
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Failed to download file from R2: {str(e)}"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Download error: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def delete_multiple(self, object_keys: list[str]) -> bool:
|
|
||||||
"""
|
|
||||||
Delete multiple files from R2 storage
|
|
||||||
|
|
||||||
Args:
|
|
||||||
object_keys: List of S3 object keys to delete
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bool: True if successful
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: If deletion fails
|
|
||||||
"""
|
|
||||||
if not object_keys:
|
|
||||||
return True
|
|
||||||
|
|
||||||
try:
|
|
||||||
# R2/S3 delete_objects accepts up to 1000 keys at once
|
|
||||||
objects = [{'Key': key} for key in object_keys if key]
|
|
||||||
|
|
||||||
if objects:
|
|
||||||
self.client.delete_objects(
|
|
||||||
Bucket=self.bucket_name,
|
|
||||||
Delete={'Objects': objects}
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
except ClientError as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Failed to delete files from R2: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Singleton instance
|
# Singleton instance
|
||||||
_r2_storage = None
|
_r2_storage = None
|
||||||
|
|||||||
@@ -10,127 +10,21 @@ Key Features:
|
|||||||
- Validate and standardize user data (DOB, phone numbers)
|
- Validate and standardize user data (DOB, phone numbers)
|
||||||
- Generate smart status suggestions based on approval and subscription data
|
- Generate smart status suggestions based on approval and subscription data
|
||||||
- Comprehensive data quality analysis and error reporting
|
- Comprehensive data quality analysis and error reporting
|
||||||
- Multi-file import support (Users, Members, Payments CSVs)
|
|
||||||
- Field mapping based on Meta Name Reference document
|
|
||||||
|
|
||||||
Author: Claude Code
|
Author: Claude Code
|
||||||
Date: 2025-12-24
|
Date: 2025-12-24
|
||||||
Updated: 2026-02-03 - Added comprehensive multi-file import support
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List, Optional, Tuple, Any
|
from typing import Dict, List, Optional, Tuple
|
||||||
import phpserialize
|
import phpserialize
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Meta Name Reference Field Mapping (from client's WordPress export)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Maps WordPress meta names to our database fields
|
|
||||||
# Format: 'wordpress_meta_name': ('db_field', 'field_type', 'parser_function')
|
|
||||||
META_FIELD_MAPPING = {
|
|
||||||
# Basic user info
|
|
||||||
'first_name': ('first_name', 'string', None),
|
|
||||||
'last_name': ('last_name', 'string', None),
|
|
||||||
'user_email': ('email', 'string', 'lowercase'),
|
|
||||||
'user_login': ('username', 'string', None), # For reference only
|
|
||||||
'address': ('address', 'string', None),
|
|
||||||
'city': ('city', 'string', None),
|
|
||||||
'state': ('state', 'string', None),
|
|
||||||
'zipcode': ('zipcode', 'string', None),
|
|
||||||
'cell_phone': ('phone', 'string', 'phone'),
|
|
||||||
'date_of_birth': ('date_of_birth', 'date', 'date_mmddyyyy'),
|
|
||||||
|
|
||||||
# Partner info
|
|
||||||
'partner_first_name': ('partner_first_name', 'string', None),
|
|
||||||
'partner_last_name': ('partner_last_name', 'string', None),
|
|
||||||
'partner_membership_status': ('partner_is_member', 'boolean', 'yes_no'),
|
|
||||||
'partner_membership_consideration': ('partner_plan_to_become_member', 'boolean', 'yes_no'),
|
|
||||||
|
|
||||||
# Newsletter preferences
|
|
||||||
'newsletter_consent': ('newsletter_subscribed', 'boolean', 'yes_no'),
|
|
||||||
'newsletter_checklist': ('newsletter_preferences', 'multi_value', 'newsletter_checklist'),
|
|
||||||
|
|
||||||
# Referral and lead sources
|
|
||||||
'member_referral': ('referred_by_member_name', 'string', None),
|
|
||||||
'referral_source': ('lead_sources', 'multi_value', 'lead_sources'),
|
|
||||||
|
|
||||||
# Volunteer interests
|
|
||||||
'volunteer_checklist': ('volunteer_interests', 'multi_value', 'volunteer_checklist'),
|
|
||||||
|
|
||||||
# Scholarship
|
|
||||||
'scholarship_request': ('scholarship_requested', 'boolean', 'yes_no'),
|
|
||||||
'scholarship_reason': ('scholarship_reason', 'string', None),
|
|
||||||
|
|
||||||
# Directory settings
|
|
||||||
'members_directory_filter': ('show_in_directory', 'boolean', 'yes_no'),
|
|
||||||
'md_display_name': ('custom_registration_data.directory_display_name', 'custom', None),
|
|
||||||
'md_email': ('directory_email', 'string', None),
|
|
||||||
'description': ('directory_bio', 'string', None),
|
|
||||||
'md_adress': ('directory_address', 'string', None), # Note: typo in WordPress
|
|
||||||
'md_phone': ('directory_phone', 'string', None),
|
|
||||||
'md_dob': ('directory_dob', 'date', 'date_mmddyyyy'),
|
|
||||||
'md_partner_name': ('directory_partner_name', 'string', None),
|
|
||||||
'md_avatar': ('profile_photo_url', 'string', None),
|
|
||||||
|
|
||||||
# Metadata
|
|
||||||
'member_since': ('member_since', 'date', 'date_various'),
|
|
||||||
'user_registered': ('wordpress_registered_date', 'datetime', 'datetime_mysql'),
|
|
||||||
'ID': ('wordpress_user_id', 'integer', None),
|
|
||||||
|
|
||||||
# Stripe info (from WordPress)
|
|
||||||
'pms_stripe_customer_id': ('stripe_customer_id', 'string', None),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Newsletter checklist option mapping
|
|
||||||
NEWSLETTER_CHECKLIST_OPTIONS = {
|
|
||||||
'name': 'newsletter_publish_name',
|
|
||||||
'photo': 'newsletter_publish_photo',
|
|
||||||
'birthday': 'newsletter_publish_birthday',
|
|
||||||
'none': 'newsletter_publish_none',
|
|
||||||
# Handle various WordPress stored formats
|
|
||||||
'my name': 'newsletter_publish_name',
|
|
||||||
'my photo': 'newsletter_publish_photo',
|
|
||||||
'my birthday': 'newsletter_publish_birthday',
|
|
||||||
}
|
|
||||||
|
|
||||||
# Volunteer interests mapping (WordPress values to our format)
|
|
||||||
VOLUNTEER_INTERESTS_MAP = {
|
|
||||||
'events': 'Events',
|
|
||||||
'fundraising': 'Fundraising',
|
|
||||||
'communications': 'Communications',
|
|
||||||
'membership': 'Membership',
|
|
||||||
'board': 'Board of Directors',
|
|
||||||
'other': 'Other',
|
|
||||||
# Handle various WordPress formats
|
|
||||||
'help with events': 'Events',
|
|
||||||
'help with fundraising': 'Fundraising',
|
|
||||||
'help with communications': 'Communications',
|
|
||||||
'help with membership': 'Membership',
|
|
||||||
'serve on the board': 'Board of Directors',
|
|
||||||
}
|
|
||||||
|
|
||||||
# Lead sources mapping
|
|
||||||
LEAD_SOURCES_MAP = {
|
|
||||||
'current member': 'Current member',
|
|
||||||
'friend': 'Friend',
|
|
||||||
'outsmart magazine': 'OutSmart Magazine',
|
|
||||||
'outsmart': 'OutSmart Magazine',
|
|
||||||
'search engine': 'Search engine (Google etc.)',
|
|
||||||
'google': 'Search engine (Google etc.)',
|
|
||||||
'known about loaf': "I've known about LOAF for a long time",
|
|
||||||
'long time': "I've known about LOAF for a long time",
|
|
||||||
'other': 'Other',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# WordPress Role Mapping Configuration
|
# WordPress Role Mapping Configuration
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -389,622 +283,6 @@ def validate_dob(dob_str: str) -> Tuple[Optional[datetime], Optional[str]]:
|
|||||||
return None, f'Invalid date format: {dob_str} (expected MM/DD/YYYY)'
|
return None, f'Invalid date format: {dob_str} (expected MM/DD/YYYY)'
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Enhanced Field Parsers for Meta Name Reference
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
def parse_boolean_yes_no(value: Any) -> bool:
|
|
||||||
"""
|
|
||||||
Parse yes/no style boolean values from WordPress.
|
|
||||||
|
|
||||||
Handles: yes, no, true, false, 1, 0, checked, unchecked
|
|
||||||
"""
|
|
||||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
|
||||||
return False
|
|
||||||
|
|
||||||
str_val = str(value).lower().strip()
|
|
||||||
return str_val in ('yes', 'true', '1', 'checked', 'on', 'y')
|
|
||||||
|
|
||||||
|
|
||||||
def parse_date_various(date_str: Any) -> Optional[datetime]:
|
|
||||||
"""
|
|
||||||
Parse dates in various formats commonly found in WordPress exports.
|
|
||||||
|
|
||||||
Handles:
|
|
||||||
- MM/DD/YYYY (US format)
|
|
||||||
- YYYY-MM-DD (ISO format)
|
|
||||||
- DD/MM/YYYY (EU format - attempted if US fails)
|
|
||||||
- Month DD, YYYY (e.g., "January 15, 2020")
|
|
||||||
"""
|
|
||||||
if date_str is None or (isinstance(date_str, float) and pd.isna(date_str)):
|
|
||||||
return None
|
|
||||||
|
|
||||||
date_str = str(date_str).strip()
|
|
||||||
if not date_str or date_str.lower() == 'nan':
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Try various formats
|
|
||||||
formats = [
|
|
||||||
'%m/%d/%Y', # US: 01/15/2020
|
|
||||||
'%Y-%m-%d', # ISO: 2020-01-15
|
|
||||||
'%d/%m/%Y', # EU: 15/01/2020
|
|
||||||
'%B %d, %Y', # Full: January 15, 2020
|
|
||||||
'%b %d, %Y', # Short: Jan 15, 2020
|
|
||||||
'%Y-%m-%d %H:%M:%S', # MySQL datetime
|
|
||||||
'%m/%Y', # Month/Year: 01/2020
|
|
||||||
'%m-%Y', # Month-Year: 01-2020
|
|
||||||
'%b-%Y', # Short month-Year: Jan-2020
|
|
||||||
'%B-%Y', # Full month-Year: January-2020
|
|
||||||
]
|
|
||||||
|
|
||||||
for fmt in formats:
|
|
||||||
try:
|
|
||||||
parsed = datetime.strptime(date_str, fmt)
|
|
||||||
# Validate year range
|
|
||||||
if 1900 <= parsed.year <= datetime.now().year + 1:
|
|
||||||
return parsed
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Only log warning for strings that look like dates
|
|
||||||
if date_str and len(date_str) > 3:
|
|
||||||
logger.debug(f"Could not parse date: {date_str}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def parse_datetime_mysql(dt_str: Any) -> Optional[datetime]:
|
|
||||||
"""Parse MySQL datetime format: YYYY-MM-DD HH:MM:SS"""
|
|
||||||
if dt_str is None or (isinstance(dt_str, float) and pd.isna(dt_str)):
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
return datetime.strptime(str(dt_str).strip(), '%Y-%m-%d %H:%M:%S')
|
|
||||||
except ValueError:
|
|
||||||
return parse_date_various(dt_str)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_newsletter_checklist(value: Any) -> Dict[str, bool]:
|
|
||||||
"""
|
|
||||||
Parse newsletter checklist multi-value field.
|
|
||||||
|
|
||||||
WordPress stores this as comma-separated or PHP serialized values.
|
|
||||||
Returns dict mapping to our newsletter_publish_* fields.
|
|
||||||
"""
|
|
||||||
result = {
|
|
||||||
'newsletter_publish_name': False,
|
|
||||||
'newsletter_publish_photo': False,
|
|
||||||
'newsletter_publish_birthday': False,
|
|
||||||
'newsletter_publish_none': False,
|
|
||||||
}
|
|
||||||
|
|
||||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
|
||||||
return result
|
|
||||||
|
|
||||||
str_val = str(value).lower().strip()
|
|
||||||
if not str_val or str_val == 'nan':
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Try PHP serialized first
|
|
||||||
if str_val.startswith('a:'):
|
|
||||||
try:
|
|
||||||
parsed = phpserialize.loads(str_val.encode('utf-8'))
|
|
||||||
if isinstance(parsed, dict):
|
|
||||||
for key in parsed.keys():
|
|
||||||
key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
|
|
||||||
key_lower = key_str.lower()
|
|
||||||
for match_key, field in NEWSLETTER_CHECKLIST_OPTIONS.items():
|
|
||||||
if match_key in key_lower:
|
|
||||||
result[field] = True
|
|
||||||
return result
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Try comma-separated values
|
|
||||||
items = [item.strip().lower() for item in str_val.split(',')]
|
|
||||||
for item in items:
|
|
||||||
for match_key, field in NEWSLETTER_CHECKLIST_OPTIONS.items():
|
|
||||||
if match_key in item:
|
|
||||||
result[field] = True
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def parse_volunteer_checklist(value: Any) -> List[str]:
|
|
||||||
"""
|
|
||||||
Parse volunteer interests checklist.
|
|
||||||
|
|
||||||
Returns list of standardized volunteer interest labels.
|
|
||||||
"""
|
|
||||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
|
||||||
return []
|
|
||||||
|
|
||||||
str_val = str(value).lower().strip()
|
|
||||||
if not str_val or str_val == 'nan':
|
|
||||||
return []
|
|
||||||
|
|
||||||
interests = []
|
|
||||||
|
|
||||||
# Try PHP serialized first
|
|
||||||
if str_val.startswith('a:'):
|
|
||||||
try:
|
|
||||||
parsed = phpserialize.loads(str_val.encode('utf-8'))
|
|
||||||
if isinstance(parsed, dict):
|
|
||||||
for key in parsed.keys():
|
|
||||||
key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
|
|
||||||
key_lower = key_str.lower()
|
|
||||||
for match_key, label in VOLUNTEER_INTERESTS_MAP.items():
|
|
||||||
if match_key in key_lower and label not in interests:
|
|
||||||
interests.append(label)
|
|
||||||
return interests
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Try comma-separated values
|
|
||||||
items = [item.strip().lower() for item in str_val.split(',')]
|
|
||||||
for item in items:
|
|
||||||
for match_key, label in VOLUNTEER_INTERESTS_MAP.items():
|
|
||||||
if match_key in item and label not in interests:
|
|
||||||
interests.append(label)
|
|
||||||
|
|
||||||
return interests
|
|
||||||
|
|
||||||
|
|
||||||
def parse_lead_sources(value: Any) -> List[str]:
|
|
||||||
"""
|
|
||||||
Parse referral/lead sources field.
|
|
||||||
|
|
||||||
Returns list of standardized lead source labels.
|
|
||||||
"""
|
|
||||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
|
||||||
return []
|
|
||||||
|
|
||||||
str_val = str(value).lower().strip()
|
|
||||||
if not str_val or str_val == 'nan':
|
|
||||||
return []
|
|
||||||
|
|
||||||
sources = []
|
|
||||||
|
|
||||||
# Try PHP serialized first
|
|
||||||
if str_val.startswith('a:'):
|
|
||||||
try:
|
|
||||||
parsed = phpserialize.loads(str_val.encode('utf-8'))
|
|
||||||
if isinstance(parsed, dict):
|
|
||||||
for key in parsed.keys():
|
|
||||||
key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
|
|
||||||
key_lower = key_str.lower()
|
|
||||||
for match_key, label in LEAD_SOURCES_MAP.items():
|
|
||||||
if match_key in key_lower and label not in sources:
|
|
||||||
sources.append(label)
|
|
||||||
return sources
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Try comma-separated values
|
|
||||||
items = [item.strip().lower() for item in str_val.split(',')]
|
|
||||||
for item in items:
|
|
||||||
matched = False
|
|
||||||
for match_key, label in LEAD_SOURCES_MAP.items():
|
|
||||||
if match_key in item and label not in sources:
|
|
||||||
sources.append(label)
|
|
||||||
matched = True
|
|
||||||
break
|
|
||||||
# If no match, add as "Other" with original value
|
|
||||||
if not matched and item:
|
|
||||||
sources.append('Other')
|
|
||||||
|
|
||||||
return sources
|
|
||||||
|
|
||||||
|
|
||||||
def transform_csv_row_to_user_data(row: Dict[str, Any], existing_emails: set = None) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Transform a CSV row to user data dictionary using Meta Name Reference mapping.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
row: Dictionary of CSV column values
|
|
||||||
existing_emails: Set of emails already in database (for duplicate check)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with:
|
|
||||||
- user_data: Fields that map to User model
|
|
||||||
- custom_data: Fields for custom_registration_data JSON
|
|
||||||
- newsletter_prefs: Newsletter preference booleans
|
|
||||||
- warnings: List of warning messages
|
|
||||||
- errors: List of error messages
|
|
||||||
"""
|
|
||||||
user_data = {}
|
|
||||||
custom_data = {}
|
|
||||||
newsletter_prefs = {}
|
|
||||||
warnings = []
|
|
||||||
errors = []
|
|
||||||
|
|
||||||
# Process each mapped field
|
|
||||||
for csv_field, (db_field, field_type, parser) in META_FIELD_MAPPING.items():
|
|
||||||
value = row.get(csv_field)
|
|
||||||
|
|
||||||
# Skip if no value
|
|
||||||
if value is None or (isinstance(value, float) and pd.isna(value)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Parse based on field type
|
|
||||||
if field_type == 'string':
|
|
||||||
if parser == 'lowercase':
|
|
||||||
parsed_value = str(value).strip().lower()
|
|
||||||
elif parser == 'phone':
|
|
||||||
parsed_value = standardize_phone(value)
|
|
||||||
if parsed_value == '0000000000':
|
|
||||||
warnings.append(f'Invalid phone: {value}')
|
|
||||||
else:
|
|
||||||
parsed_value = str(value).strip() if value else None
|
|
||||||
|
|
||||||
elif field_type == 'integer':
|
|
||||||
parsed_value = int(value) if value else None
|
|
||||||
|
|
||||||
elif field_type == 'boolean':
|
|
||||||
parsed_value = parse_boolean_yes_no(value)
|
|
||||||
|
|
||||||
elif field_type == 'date':
|
|
||||||
if parser == 'date_mmddyyyy':
|
|
||||||
parsed_value, warning = validate_dob(value)
|
|
||||||
if warning:
|
|
||||||
warnings.append(warning)
|
|
||||||
else:
|
|
||||||
parsed_value = parse_date_various(value)
|
|
||||||
|
|
||||||
elif field_type == 'datetime':
|
|
||||||
parsed_value = parse_datetime_mysql(value)
|
|
||||||
|
|
||||||
elif field_type == 'multi_value':
|
|
||||||
if parser == 'newsletter_checklist':
|
|
||||||
newsletter_prefs = parse_newsletter_checklist(value)
|
|
||||||
continue # Handled separately
|
|
||||||
elif parser == 'volunteer_checklist':
|
|
||||||
parsed_value = parse_volunteer_checklist(value)
|
|
||||||
elif parser == 'lead_sources':
|
|
||||||
parsed_value = parse_lead_sources(value)
|
|
||||||
else:
|
|
||||||
parsed_value = [str(value)]
|
|
||||||
|
|
||||||
elif field_type == 'custom':
|
|
||||||
# Store in custom_registration_data
|
|
||||||
custom_field = db_field.replace('custom_registration_data.', '')
|
|
||||||
custom_data[custom_field] = str(value).strip() if value else None
|
|
||||||
continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
parsed_value = value
|
|
||||||
|
|
||||||
# Store in appropriate location
|
|
||||||
if parsed_value is not None:
|
|
||||||
user_data[db_field] = parsed_value
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
warnings.append(f'Error parsing {csv_field}: {str(e)}')
|
|
||||||
|
|
||||||
# Check for required fields
|
|
||||||
if not user_data.get('email'):
|
|
||||||
errors.append('Missing email address')
|
|
||||||
elif existing_emails and user_data['email'] in existing_emails:
|
|
||||||
errors.append('Email already exists in database')
|
|
||||||
|
|
||||||
if not user_data.get('first_name'):
|
|
||||||
warnings.append('Missing first name')
|
|
||||||
|
|
||||||
if not user_data.get('last_name'):
|
|
||||||
warnings.append('Missing last name')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'user_data': user_data,
|
|
||||||
'custom_data': custom_data,
|
|
||||||
'newsletter_prefs': newsletter_prefs,
|
|
||||||
'warnings': warnings,
|
|
||||||
'errors': errors
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Members CSV Parser (Subscription Data)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
def parse_members_csv(file_path: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Parse WordPress PMS Members export CSV for subscription data.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to pms-export-members CSV file
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary mapping user_email to subscription data
|
|
||||||
"""
|
|
||||||
members_data = {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
df = pd.read_csv(file_path)
|
|
||||||
|
|
||||||
for _, row in df.iterrows():
|
|
||||||
email = str(row.get('user_email', '')).strip().lower()
|
|
||||||
if not email or email == 'nan':
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Parse subscription dates
|
|
||||||
start_date = parse_date_various(row.get('start_date'))
|
|
||||||
expiration_date = parse_date_various(row.get('expiration_date'))
|
|
||||||
|
|
||||||
# Map subscription status
|
|
||||||
wp_status = str(row.get('status', '')).lower().strip()
|
|
||||||
if wp_status == 'active':
|
|
||||||
sub_status = 'active'
|
|
||||||
elif wp_status in ('expired', 'abandoned'):
|
|
||||||
sub_status = 'expired'
|
|
||||||
elif wp_status in ('canceled', 'cancelled'):
|
|
||||||
sub_status = 'cancelled'
|
|
||||||
else:
|
|
||||||
sub_status = 'active' # Default
|
|
||||||
|
|
||||||
# Parse payment gateway
|
|
||||||
payment_gateway = str(row.get('payment_gateway', '')).lower().strip()
|
|
||||||
if 'stripe' in payment_gateway:
|
|
||||||
payment_method = 'stripe'
|
|
||||||
elif 'paypal' in payment_gateway:
|
|
||||||
payment_method = 'paypal'
|
|
||||||
elif payment_gateway in ('manual', 'admin', ''):
|
|
||||||
payment_method = 'manual'
|
|
||||||
else:
|
|
||||||
payment_method = payment_gateway or 'manual'
|
|
||||||
|
|
||||||
members_data[email] = {
|
|
||||||
'subscription_plan_id': row.get('subscription_plan_id'),
|
|
||||||
'subscription_plan_name': row.get('subscription_plan_name'),
|
|
||||||
'start_date': start_date,
|
|
||||||
'end_date': expiration_date,
|
|
||||||
'status': sub_status,
|
|
||||||
'payment_method': payment_method,
|
|
||||||
'wordpress_user_id': row.get('user_id'),
|
|
||||||
'billing_first_name': row.get('billing_first_name'),
|
|
||||||
'billing_last_name': row.get('billing_last_name'),
|
|
||||||
'billing_address': row.get('billing_address'),
|
|
||||||
'billing_city': row.get('billing_city'),
|
|
||||||
'billing_state': row.get('billing_state'),
|
|
||||||
'billing_zip': row.get('billing_zip'),
|
|
||||||
'card_last4': row.get('billing_card_last4'),
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing members CSV: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
return members_data
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Payments CSV Parser (Payment History)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
def parse_payments_csv(file_path: str) -> Dict[str, List[Dict]]:
|
|
||||||
"""
|
|
||||||
Parse WordPress PMS Payments export CSV for payment history.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to pms-export-payments CSV file
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary mapping user_email to list of payment records
|
|
||||||
"""
|
|
||||||
payments_data = {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
df = pd.read_csv(file_path)
|
|
||||||
|
|
||||||
for _, row in df.iterrows():
|
|
||||||
email = str(row.get('user_email', '')).strip().lower()
|
|
||||||
if not email or email == 'nan':
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Parse payment date
|
|
||||||
payment_date = parse_date_various(row.get('date'))
|
|
||||||
|
|
||||||
# Parse amount (convert to cents)
|
|
||||||
amount_str = str(row.get('amount', '0')).replace('$', '').replace(',', '').strip()
|
|
||||||
try:
|
|
||||||
amount_cents = int(float(amount_str) * 100)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
amount_cents = 0
|
|
||||||
|
|
||||||
# Map payment status
|
|
||||||
wp_status = str(row.get('status', '')).lower().strip()
|
|
||||||
if wp_status == 'completed':
|
|
||||||
payment_status = 'completed'
|
|
||||||
elif wp_status in ('pending', 'processing'):
|
|
||||||
payment_status = 'pending'
|
|
||||||
elif wp_status in ('failed', 'refunded'):
|
|
||||||
payment_status = 'failed'
|
|
||||||
else:
|
|
||||||
payment_status = 'completed' # Default for historical data
|
|
||||||
|
|
||||||
payment_record = {
|
|
||||||
'payment_id': row.get('payment_id'),
|
|
||||||
'amount_cents': amount_cents,
|
|
||||||
'status': payment_status,
|
|
||||||
'date': payment_date,
|
|
||||||
'payment_gateway': row.get('payment_gateway'),
|
|
||||||
'transaction_id': row.get('transaction_id'),
|
|
||||||
'profile_id': row.get('profile_id'),
|
|
||||||
'subscription_plan_id': row.get('subscription_plan_id'),
|
|
||||||
'wordpress_user_id': row.get('user_id'),
|
|
||||||
}
|
|
||||||
|
|
||||||
if email not in payments_data:
|
|
||||||
payments_data[email] = []
|
|
||||||
payments_data[email].append(payment_record)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing payments CSV: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
return payments_data
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Comprehensive Import Analysis
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
def analyze_comprehensive_import(
|
|
||||||
users_csv_path: str,
|
|
||||||
members_csv_path: Optional[str] = None,
|
|
||||||
payments_csv_path: Optional[str] = None,
|
|
||||||
existing_emails: Optional[set] = None
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze all CSV files for comprehensive import with cross-referencing.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
users_csv_path: Path to WordPress users export CSV (required)
|
|
||||||
members_csv_path: Path to PMS members CSV (optional)
|
|
||||||
payments_csv_path: Path to PMS payments CSV (optional)
|
|
||||||
existing_emails: Set of emails already in database
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comprehensive analysis with preview data for all files
|
|
||||||
"""
|
|
||||||
if existing_emails is None:
|
|
||||||
existing_emails = set()
|
|
||||||
|
|
||||||
result = {
|
|
||||||
'users': {'total': 0, 'valid': 0, 'warnings': 0, 'errors': 0, 'preview': []},
|
|
||||||
'members': {'total': 0, 'matched': 0, 'unmatched': 0, 'data': {}},
|
|
||||||
'payments': {'total': 0, 'matched': 0, 'total_amount_cents': 0, 'data': {}},
|
|
||||||
'summary': {
|
|
||||||
'total_users': 0,
|
|
||||||
'importable_users': 0,
|
|
||||||
'duplicate_emails': 0,
|
|
||||||
'users_with_subscriptions': 0,
|
|
||||||
'users_with_payments': 0,
|
|
||||||
'total_payment_amount': 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse members CSV if provided
|
|
||||||
members_data = {}
|
|
||||||
if members_csv_path:
|
|
||||||
try:
|
|
||||||
members_data = parse_members_csv(members_csv_path)
|
|
||||||
result['members']['total'] = len(members_data)
|
|
||||||
result['members']['data'] = members_data
|
|
||||||
except Exception as e:
|
|
||||||
result['members']['error'] = str(e)
|
|
||||||
|
|
||||||
# Parse payments CSV if provided
|
|
||||||
payments_data = {}
|
|
||||||
if payments_csv_path:
|
|
||||||
try:
|
|
||||||
payments_data = parse_payments_csv(payments_csv_path)
|
|
||||||
result['payments']['total'] = sum(len(p) for p in payments_data.values())
|
|
||||||
result['payments']['data'] = payments_data
|
|
||||||
result['payments']['total_amount_cents'] = sum(
|
|
||||||
sum(p['amount_cents'] for p in payments)
|
|
||||||
for payments in payments_data.values()
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
result['payments']['error'] = str(e)
|
|
||||||
|
|
||||||
# Parse users CSV
|
|
||||||
try:
|
|
||||||
df = pd.read_csv(users_csv_path)
|
|
||||||
result['users']['total'] = len(df)
|
|
||||||
|
|
||||||
seen_emails = set()
|
|
||||||
total_warnings = 0
|
|
||||||
total_errors = 0
|
|
||||||
|
|
||||||
for idx, row in df.iterrows():
|
|
||||||
row_dict = row.to_dict()
|
|
||||||
transformed = transform_csv_row_to_user_data(row_dict, existing_emails)
|
|
||||||
|
|
||||||
email = transformed['user_data'].get('email', '').lower()
|
|
||||||
|
|
||||||
# Check for CSV duplicates
|
|
||||||
if email in seen_emails:
|
|
||||||
transformed['errors'].append(f'Duplicate email in CSV')
|
|
||||||
elif email:
|
|
||||||
seen_emails.add(email)
|
|
||||||
|
|
||||||
# Cross-reference with members data
|
|
||||||
subscription_data = members_data.get(email)
|
|
||||||
if subscription_data:
|
|
||||||
result['members']['matched'] += 1
|
|
||||||
|
|
||||||
# Cross-reference with payments data
|
|
||||||
payment_records = payments_data.get(email, [])
|
|
||||||
if payment_records:
|
|
||||||
result['payments']['matched'] += 1
|
|
||||||
|
|
||||||
# Parse WordPress roles for role/status suggestion
|
|
||||||
wp_capabilities = row.get('wp_capabilities', '')
|
|
||||||
wp_roles = parse_php_serialized(wp_capabilities)
|
|
||||||
loaf_role, role_status = map_wordpress_role(wp_roles)
|
|
||||||
|
|
||||||
# Determine status
|
|
||||||
approval_status = str(row.get('wppb_approval_status', '')).strip()
|
|
||||||
has_subscription = 'pms_subscription_plan_63' in wp_roles or subscription_data is not None
|
|
||||||
|
|
||||||
if role_status:
|
|
||||||
suggested_status = role_status
|
|
||||||
else:
|
|
||||||
suggested_status = suggest_status(approval_status, has_subscription, loaf_role)
|
|
||||||
|
|
||||||
# Build preview row
|
|
||||||
preview_row = {
|
|
||||||
'row_number': idx + 1,
|
|
||||||
'email': email,
|
|
||||||
'first_name': transformed['user_data'].get('first_name', ''),
|
|
||||||
'last_name': transformed['user_data'].get('last_name', ''),
|
|
||||||
'phone': transformed['user_data'].get('phone', ''),
|
|
||||||
'date_of_birth': transformed['user_data'].get('date_of_birth').isoformat() if transformed['user_data'].get('date_of_birth') else None,
|
|
||||||
'wordpress_user_id': transformed['user_data'].get('wordpress_user_id'),
|
|
||||||
'wordpress_roles': wp_roles,
|
|
||||||
'suggested_role': loaf_role,
|
|
||||||
'suggested_status': suggested_status,
|
|
||||||
'has_subscription': has_subscription,
|
|
||||||
'subscription_data': subscription_data,
|
|
||||||
'payment_count': len(payment_records),
|
|
||||||
'total_paid_cents': sum(p['amount_cents'] for p in payment_records),
|
|
||||||
'user_data': transformed['user_data'],
|
|
||||||
'custom_data': transformed['custom_data'],
|
|
||||||
'newsletter_prefs': transformed['newsletter_prefs'],
|
|
||||||
'warnings': transformed['warnings'],
|
|
||||||
'errors': transformed['errors'],
|
|
||||||
}
|
|
||||||
|
|
||||||
result['users']['preview'].append(preview_row)
|
|
||||||
total_warnings += len(transformed['warnings'])
|
|
||||||
total_errors += len(transformed['errors'])
|
|
||||||
|
|
||||||
if not transformed['errors']:
|
|
||||||
result['users']['valid'] += 1
|
|
||||||
|
|
||||||
result['users']['warnings'] = total_warnings
|
|
||||||
result['users']['errors'] = total_errors
|
|
||||||
|
|
||||||
# Calculate unmatched members
|
|
||||||
user_emails = {p['email'] for p in result['users']['preview'] if p['email']}
|
|
||||||
result['members']['unmatched'] = len(set(members_data.keys()) - user_emails)
|
|
||||||
|
|
||||||
# Summary stats
|
|
||||||
result['summary']['total_users'] = result['users']['total']
|
|
||||||
result['summary']['importable_users'] = result['users']['valid']
|
|
||||||
result['summary']['duplicate_emails'] = len(seen_emails & existing_emails)
|
|
||||||
result['summary']['users_with_subscriptions'] = result['members']['matched']
|
|
||||||
result['summary']['users_with_payments'] = result['payments']['matched']
|
|
||||||
result['summary']['total_payment_amount'] = result['payments']['total_amount_cents']
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error analyzing users CSV: {str(e)}")
|
|
||||||
result['users']['error'] = str(e)
|
|
||||||
raise
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# CSV Analysis and Preview Generation
|
# CSV Analysis and Preview Generation
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -1066,6 +344,8 @@ def analyze_csv(file_path: str, existing_emails: Optional[set] = None) -> Dict:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
# Read CSV with pandas
|
# Read CSV with pandas
|
||||||
df = pd.read_csv(file_path)
|
df = pd.read_csv(file_path)
|
||||||
|
|
||||||
@@ -1241,4 +521,11 @@ def format_preview_for_display(preview_data: List[Dict], page: int = 1, page_siz
|
|||||||
# Module Initialization
|
# Module Initialization
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
# Import pandas for CSV processing
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
except ImportError:
|
||||||
|
logger.error("pandas library not found. Please install: pip install pandas")
|
||||||
|
raise
|
||||||
|
|
||||||
logger.info("WordPress parser module loaded successfully")
|
logger.info("WordPress parser module loaded successfully")
|
||||||
|
|||||||
Reference in New Issue
Block a user