Template-Based CSV Import System with R2 Storage

Solution: Updated backend/r2_storage.py: - Added ALLOWED_CSV_TYPES for CSV file validation - Added upload_bytes() method for uploading raw bytes to R2 - Added download_file() method for retrieving files from R2 - Added delete_multiple() method for bulk file deletion Comprehensive upload endpoint now stores CSVs in R2: r2_storage = get_r2_storage() for file_type, (content, filename) in file_contents.items(): _, r2_key, _ = await r2_storage.upload_bytes( content=content, folder=f"imports/{job_id}", filename=f"{file_type}_{filename}", content_type='text/csv' ) r2_keys[file_type] = r2_key --- 2. Stripe Transaction ID Tracking Solution: Updated subscription and donation imports to capture Stripe metadata: Subscription fields: - stripe_subscription_id - stripe_customer_id - stripe_payment_intent_id - stripe_invoice_id - stripe_charge_id - stripe_receipt_url - card_last4, card_brand, payment_method Donation fields: - stripe_payment_intent_id - stripe_charge_id - stripe_receipt_url - card_last4, card_brand --- 3. Fixed JSON Serialization Error Problem: Object of type datetime is not JSON serializable when saving import metadata. Solution: Added serialize_for_json() helper in backend/server.py: def serialize_for_json(obj): """Recursively convert datetime objects to ISO strings for JSON serialization.""" if isinstance(obj, (datetime, date)): return obj.isoformat() elif isinstance(obj, dict): return {k: serialize_for_json(v) for k, v in obj.items()} elif isinstance(obj, list): return [serialize_for_json(item) for item in obj] # ... handles other types --- 4. Fixed Route Ordering (401 Unauthorized) Problem: /admin/import/comprehensive/upload returned 401 because FastAPI matched "comprehensive" as a {job_id} parameter. Solution: Moved comprehensive import routes BEFORE generic {job_id} routes in backend/server.py: # Correct order: @app.post("/api/admin/import/comprehensive/upload") # Specific route FIRST # ... other comprehensive routes ... @app.get("/api/admin/import/{job_id}/preview") # Generic route AFTER --- 5. Improved Date Parsing Solution: Added additional date formats to backend/wordpress_parser.py: formats = [ '%m/%d/%Y', '%Y-%m-%d', '%d/%m/%Y', '%B %d, %Y', '%b %d, %Y', '%Y-%m-%d %H:%M:%S', '%m/%Y', # Month/Year: 01/2020 '%m-%Y', # Month-Year: 01-2020 '%b-%Y', # Short month-Year: Jan-2020 '%B-%Y', # Full month-Year: January-2020 ]
2026-02-04 22:50:36 +07:00
parent e7f6e9c20a
commit 1988787a1f
7 changed files with 3583 additions and 75 deletions
--- a/pycache/r2_storage.cpython-312.pyc
+++ b/pycache/r2_storage.cpython-312.pyc
--- a/pycache/server.cpython-312.pyc
+++ b/pycache/server.cpython-312.pyc
--- a/pycache/wordpress_parser.cpython-312.pyc
+++ b/pycache/wordpress_parser.cpython-312.pyc
--- a/import_templates.py
+++ b/import_templates.py
--- a/r2_storage.py
+++ b/r2_storage.py
@@ -50,6 +50,14 @@ class R2Storage:
        'image/svg+xml': ['.svg']
    }

+    # CSV files for imports
+    ALLOWED_CSV_TYPES = {
+        'text/csv': ['.csv'],
+        'text/plain': ['.csv'],  # Some systems report CSV as text/plain
+        'application/csv': ['.csv'],
+        'application/vnd.ms-excel': ['.csv'],  # Old Excel type sometimes used for CSV
+    }
+
    def __init__(self):
        """Initialize R2 client with credentials from environment"""
        self.account_id = os.getenv('R2_ACCOUNT_ID')
@@ -240,6 +248,127 @@ class R2Storage:
        except ClientError:
            return False

+    async def upload_bytes(
+        self,
+        content: bytes,
+        folder: str,
+        filename: str,
+        content_type: str = 'text/csv'
+    ) -> tuple[str, str, int]:
+        """
+        Upload raw bytes to R2 storage (useful for CSV imports)
+
+        Args:
+            content: Raw bytes to upload
+            folder: Folder path in R2 (e.g., 'imports/job-id')
+            filename: Original filename
+            content_type: MIME type of the content
+
+        Returns:
+            tuple: (public_url, object_key, file_size_bytes)
+
+        Raises:
+            HTTPException: If upload fails
+        """
+        try:
+            file_size = len(content)
+
+            # Generate unique filename preserving original extension
+            file_extension = Path(filename).suffix.lower() or '.csv'
+            unique_filename = f"{uuid.uuid4()}{file_extension}"
+            object_key = f"{folder}/{unique_filename}"
+
+            # Upload to R2
+            self.client.put_object(
+                Bucket=self.bucket_name,
+                Key=object_key,
+                Body=content,
+                ContentType=content_type,
+                ContentLength=file_size
+            )
+
+            # Generate public URL
+            public_url = self.get_public_url(object_key)
+
+            return public_url, object_key, file_size
+
+        except ClientError as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to upload to R2: {str(e)}"
+            )
+        except Exception as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Upload error: {str(e)}"
+            )
+
+    async def download_file(self, object_key: str) -> bytes:
+        """
+        Download a file from R2 storage
+
+        Args:
+            object_key: The S3 object key (path) of the file
+
+        Returns:
+            bytes: File content
+
+        Raises:
+            HTTPException: If download fails
+        """
+        try:
+            response = self.client.get_object(
+                Bucket=self.bucket_name,
+                Key=object_key
+            )
+            return response['Body'].read()
+
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'NoSuchKey':
+                raise HTTPException(status_code=404, detail="File not found in storage")
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to download file from R2: {str(e)}"
+            )
+        except Exception as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Download error: {str(e)}"
+            )
+
+    async def delete_multiple(self, object_keys: list[str]) -> bool:
+        """
+        Delete multiple files from R2 storage
+
+        Args:
+            object_keys: List of S3 object keys to delete
+
+        Returns:
+            bool: True if successful
+
+        Raises:
+            HTTPException: If deletion fails
+        """
+        if not object_keys:
+            return True
+
+        try:
+            # R2/S3 delete_objects accepts up to 1000 keys at once
+            objects = [{'Key': key} for key in object_keys if key]
+
+            if objects:
+                self.client.delete_objects(
+                    Bucket=self.bucket_name,
+                    Delete={'Objects': objects}
+                )
+            return True
+
+        except ClientError as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to delete files from R2: {str(e)}"
+            )
+

 # Singleton instance
 _r2_storage = None
--- a/server.py
+++ b/server.py
--- a/wordpress_parser.py
+++ b/wordpress_parser.py
@@ -10,21 +10,127 @@ Key Features:
 - Validate and standardize user data (DOB, phone numbers)
 - Generate smart status suggestions based on approval and subscription data
 - Comprehensive data quality analysis and error reporting
+- Multi-file import support (Users, Members, Payments CSVs)
+- Field mapping based on Meta Name Reference document

 Author: Claude Code
 Date: 2025-12-24
+Updated: 2026-02-03 - Added comprehensive multi-file import support
 """

 import csv
 import re
 import logging
 from datetime import datetime
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple, Any
 import phpserialize
+import pandas as pd

 logger = logging.getLogger(__name__)


+# ============================================================================
+# Meta Name Reference Field Mapping (from client's WordPress export)
+# ============================================================================
+
+# Maps WordPress meta names to our database fields
+# Format: 'wordpress_meta_name': ('db_field', 'field_type', 'parser_function')
+META_FIELD_MAPPING = {
+    # Basic user info
+    'first_name': ('first_name', 'string', None),
+    'last_name': ('last_name', 'string', None),
+    'user_email': ('email', 'string', 'lowercase'),
+    'user_login': ('username', 'string', None),  # For reference only
+    'address': ('address', 'string', None),
+    'city': ('city', 'string', None),
+    'state': ('state', 'string', None),
+    'zipcode': ('zipcode', 'string', None),
+    'cell_phone': ('phone', 'string', 'phone'),
+    'date_of_birth': ('date_of_birth', 'date', 'date_mmddyyyy'),
+
+    # Partner info
+    'partner_first_name': ('partner_first_name', 'string', None),
+    'partner_last_name': ('partner_last_name', 'string', None),
+    'partner_membership_status': ('partner_is_member', 'boolean', 'yes_no'),
+    'partner_membership_consideration': ('partner_plan_to_become_member', 'boolean', 'yes_no'),
+
+    # Newsletter preferences
+    'newsletter_consent': ('newsletter_subscribed', 'boolean', 'yes_no'),
+    'newsletter_checklist': ('newsletter_preferences', 'multi_value', 'newsletter_checklist'),
+
+    # Referral and lead sources
+    'member_referral': ('referred_by_member_name', 'string', None),
+    'referral_source': ('lead_sources', 'multi_value', 'lead_sources'),
+
+    # Volunteer interests
+    'volunteer_checklist': ('volunteer_interests', 'multi_value', 'volunteer_checklist'),
+
+    # Scholarship
+    'scholarship_request': ('scholarship_requested', 'boolean', 'yes_no'),
+    'scholarship_reason': ('scholarship_reason', 'string', None),
+
+    # Directory settings
+    'members_directory_filter': ('show_in_directory', 'boolean', 'yes_no'),
+    'md_display_name': ('custom_registration_data.directory_display_name', 'custom', None),
+    'md_email': ('directory_email', 'string', None),
+    'description': ('directory_bio', 'string', None),
+    'md_adress': ('directory_address', 'string', None),  # Note: typo in WordPress
+    'md_phone': ('directory_phone', 'string', None),
+    'md_dob': ('directory_dob', 'date', 'date_mmddyyyy'),
+    'md_partner_name': ('directory_partner_name', 'string', None),
+    'md_avatar': ('profile_photo_url', 'string', None),
+
+    # Metadata
+    'member_since': ('member_since', 'date', 'date_various'),
+    'user_registered': ('wordpress_registered_date', 'datetime', 'datetime_mysql'),
+    'ID': ('wordpress_user_id', 'integer', None),
+
+    # Stripe info (from WordPress)
+    'pms_stripe_customer_id': ('stripe_customer_id', 'string', None),
+}
+
+# Newsletter checklist option mapping
+NEWSLETTER_CHECKLIST_OPTIONS = {
+    'name': 'newsletter_publish_name',
+    'photo': 'newsletter_publish_photo',
+    'birthday': 'newsletter_publish_birthday',
+    'none': 'newsletter_publish_none',
+    # Handle various WordPress stored formats
+    'my name': 'newsletter_publish_name',
+    'my photo': 'newsletter_publish_photo',
+    'my birthday': 'newsletter_publish_birthday',
+}
+
+# Volunteer interests mapping (WordPress values to our format)
+VOLUNTEER_INTERESTS_MAP = {
+    'events': 'Events',
+    'fundraising': 'Fundraising',
+    'communications': 'Communications',
+    'membership': 'Membership',
+    'board': 'Board of Directors',
+    'other': 'Other',
+    # Handle various WordPress formats
+    'help with events': 'Events',
+    'help with fundraising': 'Fundraising',
+    'help with communications': 'Communications',
+    'help with membership': 'Membership',
+    'serve on the board': 'Board of Directors',
+}
+
+# Lead sources mapping
+LEAD_SOURCES_MAP = {
+    'current member': 'Current member',
+    'friend': 'Friend',
+    'outsmart magazine': 'OutSmart Magazine',
+    'outsmart': 'OutSmart Magazine',
+    'search engine': 'Search engine (Google etc.)',
+    'google': 'Search engine (Google etc.)',
+    'known about loaf': "I've known about LOAF for a long time",
+    'long time': "I've known about LOAF for a long time",
+    'other': 'Other',
+}
+
+
 # ============================================================================
 # WordPress Role Mapping Configuration
 # ============================================================================
@@ -283,6 +389,622 @@ def validate_dob(dob_str: str) -> Tuple[Optional[datetime], Optional[str]]:
        return None, f'Invalid date format: {dob_str} (expected MM/DD/YYYY)'


+# ============================================================================
+# Enhanced Field Parsers for Meta Name Reference
+# ============================================================================
+
+def parse_boolean_yes_no(value: Any) -> bool:
+    """
+    Parse yes/no style boolean values from WordPress.
+
+    Handles: yes, no, true, false, 1, 0, checked, unchecked
+    """
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return False
+
+    str_val = str(value).lower().strip()
+    return str_val in ('yes', 'true', '1', 'checked', 'on', 'y')
+
+
+def parse_date_various(date_str: Any) -> Optional[datetime]:
+    """
+    Parse dates in various formats commonly found in WordPress exports.
+
+    Handles:
+    - MM/DD/YYYY (US format)
+    - YYYY-MM-DD (ISO format)
+    - DD/MM/YYYY (EU format - attempted if US fails)
+    - Month DD, YYYY (e.g., "January 15, 2020")
+    """
+    if date_str is None or (isinstance(date_str, float) and pd.isna(date_str)):
+        return None
+
+    date_str = str(date_str).strip()
+    if not date_str or date_str.lower() == 'nan':
+        return None
+
+    # Try various formats
+    formats = [
+        '%m/%d/%Y',      # US: 01/15/2020
+        '%Y-%m-%d',      # ISO: 2020-01-15
+        '%d/%m/%Y',      # EU: 15/01/2020
+        '%B %d, %Y',     # Full: January 15, 2020
+        '%b %d, %Y',     # Short: Jan 15, 2020
+        '%Y-%m-%d %H:%M:%S',  # MySQL datetime
+        '%m/%Y',         # Month/Year: 01/2020
+        '%m-%Y',         # Month-Year: 01-2020
+        '%b-%Y',         # Short month-Year: Jan-2020
+        '%B-%Y',         # Full month-Year: January-2020
+    ]
+
+    for fmt in formats:
+        try:
+            parsed = datetime.strptime(date_str, fmt)
+            # Validate year range
+            if 1900 <= parsed.year <= datetime.now().year + 1:
+                return parsed
+        except ValueError:
+            continue
+
+    # Only log warning for strings that look like dates
+    if date_str and len(date_str) > 3:
+        logger.debug(f"Could not parse date: {date_str}")
+    return None
+
+
+def parse_datetime_mysql(dt_str: Any) -> Optional[datetime]:
+    """Parse MySQL datetime format: YYYY-MM-DD HH:MM:SS"""
+    if dt_str is None or (isinstance(dt_str, float) and pd.isna(dt_str)):
+        return None
+
+    try:
+        return datetime.strptime(str(dt_str).strip(), '%Y-%m-%d %H:%M:%S')
+    except ValueError:
+        return parse_date_various(dt_str)
+
+
+def parse_newsletter_checklist(value: Any) -> Dict[str, bool]:
+    """
+    Parse newsletter checklist multi-value field.
+
+    WordPress stores this as comma-separated or PHP serialized values.
+    Returns dict mapping to our newsletter_publish_* fields.
+    """
+    result = {
+        'newsletter_publish_name': False,
+        'newsletter_publish_photo': False,
+        'newsletter_publish_birthday': False,
+        'newsletter_publish_none': False,
+    }
+
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return result
+
+    str_val = str(value).lower().strip()
+    if not str_val or str_val == 'nan':
+        return result
+
+    # Try PHP serialized first
+    if str_val.startswith('a:'):
+        try:
+            parsed = phpserialize.loads(str_val.encode('utf-8'))
+            if isinstance(parsed, dict):
+                for key in parsed.keys():
+                    key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
+                    key_lower = key_str.lower()
+                    for match_key, field in NEWSLETTER_CHECKLIST_OPTIONS.items():
+                        if match_key in key_lower:
+                            result[field] = True
+                return result
+        except Exception:
+            pass
+
+    # Try comma-separated values
+    items = [item.strip().lower() for item in str_val.split(',')]
+    for item in items:
+        for match_key, field in NEWSLETTER_CHECKLIST_OPTIONS.items():
+            if match_key in item:
+                result[field] = True
+
+    return result
+
+
+def parse_volunteer_checklist(value: Any) -> List[str]:
+    """
+    Parse volunteer interests checklist.
+
+    Returns list of standardized volunteer interest labels.
+    """
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return []
+
+    str_val = str(value).lower().strip()
+    if not str_val or str_val == 'nan':
+        return []
+
+    interests = []
+
+    # Try PHP serialized first
+    if str_val.startswith('a:'):
+        try:
+            parsed = phpserialize.loads(str_val.encode('utf-8'))
+            if isinstance(parsed, dict):
+                for key in parsed.keys():
+                    key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
+                    key_lower = key_str.lower()
+                    for match_key, label in VOLUNTEER_INTERESTS_MAP.items():
+                        if match_key in key_lower and label not in interests:
+                            interests.append(label)
+                return interests
+        except Exception:
+            pass
+
+    # Try comma-separated values
+    items = [item.strip().lower() for item in str_val.split(',')]
+    for item in items:
+        for match_key, label in VOLUNTEER_INTERESTS_MAP.items():
+            if match_key in item and label not in interests:
+                interests.append(label)
+
+    return interests
+
+
+def parse_lead_sources(value: Any) -> List[str]:
+    """
+    Parse referral/lead sources field.
+
+    Returns list of standardized lead source labels.
+    """
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return []
+
+    str_val = str(value).lower().strip()
+    if not str_val or str_val == 'nan':
+        return []
+
+    sources = []
+
+    # Try PHP serialized first
+    if str_val.startswith('a:'):
+        try:
+            parsed = phpserialize.loads(str_val.encode('utf-8'))
+            if isinstance(parsed, dict):
+                for key in parsed.keys():
+                    key_str = key.decode('utf-8') if isinstance(key, bytes) else str(key)
+                    key_lower = key_str.lower()
+                    for match_key, label in LEAD_SOURCES_MAP.items():
+                        if match_key in key_lower and label not in sources:
+                            sources.append(label)
+                return sources
+        except Exception:
+            pass
+
+    # Try comma-separated values
+    items = [item.strip().lower() for item in str_val.split(',')]
+    for item in items:
+        matched = False
+        for match_key, label in LEAD_SOURCES_MAP.items():
+            if match_key in item and label not in sources:
+                sources.append(label)
+                matched = True
+                break
+        # If no match, add as "Other" with original value
+        if not matched and item:
+            sources.append('Other')
+
+    return sources
+
+
+def transform_csv_row_to_user_data(row: Dict[str, Any], existing_emails: set = None) -> Dict[str, Any]:
+    """
+    Transform a CSV row to user data dictionary using Meta Name Reference mapping.
+
+    Args:
+        row: Dictionary of CSV column values
+        existing_emails: Set of emails already in database (for duplicate check)
+
+    Returns:
+        Dictionary with:
+        - user_data: Fields that map to User model
+        - custom_data: Fields for custom_registration_data JSON
+        - newsletter_prefs: Newsletter preference booleans
+        - warnings: List of warning messages
+        - errors: List of error messages
+    """
+    user_data = {}
+    custom_data = {}
+    newsletter_prefs = {}
+    warnings = []
+    errors = []
+
+    # Process each mapped field
+    for csv_field, (db_field, field_type, parser) in META_FIELD_MAPPING.items():
+        value = row.get(csv_field)
+
+        # Skip if no value
+        if value is None or (isinstance(value, float) and pd.isna(value)):
+            continue
+
+        try:
+            # Parse based on field type
+            if field_type == 'string':
+                if parser == 'lowercase':
+                    parsed_value = str(value).strip().lower()
+                elif parser == 'phone':
+                    parsed_value = standardize_phone(value)
+                    if parsed_value == '0000000000':
+                        warnings.append(f'Invalid phone: {value}')
+                else:
+                    parsed_value = str(value).strip() if value else None
+
+            elif field_type == 'integer':
+                parsed_value = int(value) if value else None
+
+            elif field_type == 'boolean':
+                parsed_value = parse_boolean_yes_no(value)
+
+            elif field_type == 'date':
+                if parser == 'date_mmddyyyy':
+                    parsed_value, warning = validate_dob(value)
+                    if warning:
+                        warnings.append(warning)
+                else:
+                    parsed_value = parse_date_various(value)
+
+            elif field_type == 'datetime':
+                parsed_value = parse_datetime_mysql(value)
+
+            elif field_type == 'multi_value':
+                if parser == 'newsletter_checklist':
+                    newsletter_prefs = parse_newsletter_checklist(value)
+                    continue  # Handled separately
+                elif parser == 'volunteer_checklist':
+                    parsed_value = parse_volunteer_checklist(value)
+                elif parser == 'lead_sources':
+                    parsed_value = parse_lead_sources(value)
+                else:
+                    parsed_value = [str(value)]
+
+            elif field_type == 'custom':
+                # Store in custom_registration_data
+                custom_field = db_field.replace('custom_registration_data.', '')
+                custom_data[custom_field] = str(value).strip() if value else None
+                continue
+
+            else:
+                parsed_value = value
+
+            # Store in appropriate location
+            if parsed_value is not None:
+                user_data[db_field] = parsed_value
+
+        except Exception as e:
+            warnings.append(f'Error parsing {csv_field}: {str(e)}')
+
+    # Check for required fields
+    if not user_data.get('email'):
+        errors.append('Missing email address')
+    elif existing_emails and user_data['email'] in existing_emails:
+        errors.append('Email already exists in database')
+
+    if not user_data.get('first_name'):
+        warnings.append('Missing first name')
+
+    if not user_data.get('last_name'):
+        warnings.append('Missing last name')
+
+    return {
+        'user_data': user_data,
+        'custom_data': custom_data,
+        'newsletter_prefs': newsletter_prefs,
+        'warnings': warnings,
+        'errors': errors
+    }
+
+
+# ============================================================================
+# Members CSV Parser (Subscription Data)
+# ============================================================================
+
+def parse_members_csv(file_path: str) -> Dict[str, Any]:
+    """
+    Parse WordPress PMS Members export CSV for subscription data.
+
+    Args:
+        file_path: Path to pms-export-members CSV file
+
+    Returns:
+        Dictionary mapping user_email to subscription data
+    """
+    members_data = {}
+
+    try:
+        df = pd.read_csv(file_path)
+
+        for _, row in df.iterrows():
+            email = str(row.get('user_email', '')).strip().lower()
+            if not email or email == 'nan':
+                continue
+
+            # Parse subscription dates
+            start_date = parse_date_various(row.get('start_date'))
+            expiration_date = parse_date_various(row.get('expiration_date'))
+
+            # Map subscription status
+            wp_status = str(row.get('status', '')).lower().strip()
+            if wp_status == 'active':
+                sub_status = 'active'
+            elif wp_status in ('expired', 'abandoned'):
+                sub_status = 'expired'
+            elif wp_status in ('canceled', 'cancelled'):
+                sub_status = 'cancelled'
+            else:
+                sub_status = 'active'  # Default
+
+            # Parse payment gateway
+            payment_gateway = str(row.get('payment_gateway', '')).lower().strip()
+            if 'stripe' in payment_gateway:
+                payment_method = 'stripe'
+            elif 'paypal' in payment_gateway:
+                payment_method = 'paypal'
+            elif payment_gateway in ('manual', 'admin', ''):
+                payment_method = 'manual'
+            else:
+                payment_method = payment_gateway or 'manual'
+
+            members_data[email] = {
+                'subscription_plan_id': row.get('subscription_plan_id'),
+                'subscription_plan_name': row.get('subscription_plan_name'),
+                'start_date': start_date,
+                'end_date': expiration_date,
+                'status': sub_status,
+                'payment_method': payment_method,
+                'wordpress_user_id': row.get('user_id'),
+                'billing_first_name': row.get('billing_first_name'),
+                'billing_last_name': row.get('billing_last_name'),
+                'billing_address': row.get('billing_address'),
+                'billing_city': row.get('billing_city'),
+                'billing_state': row.get('billing_state'),
+                'billing_zip': row.get('billing_zip'),
+                'card_last4': row.get('billing_card_last4'),
+            }
+
+    except Exception as e:
+        logger.error(f"Error parsing members CSV: {str(e)}")
+        raise
+
+    return members_data
+
+
+# ============================================================================
+# Payments CSV Parser (Payment History)
+# ============================================================================
+
+def parse_payments_csv(file_path: str) -> Dict[str, List[Dict]]:
+    """
+    Parse WordPress PMS Payments export CSV for payment history.
+
+    Args:
+        file_path: Path to pms-export-payments CSV file
+
+    Returns:
+        Dictionary mapping user_email to list of payment records
+    """
+    payments_data = {}
+
+    try:
+        df = pd.read_csv(file_path)
+
+        for _, row in df.iterrows():
+            email = str(row.get('user_email', '')).strip().lower()
+            if not email or email == 'nan':
+                continue
+
+            # Parse payment date
+            payment_date = parse_date_various(row.get('date'))
+
+            # Parse amount (convert to cents)
+            amount_str = str(row.get('amount', '0')).replace('$', '').replace(',', '').strip()
+            try:
+                amount_cents = int(float(amount_str) * 100)
+            except (ValueError, TypeError):
+                amount_cents = 0
+
+            # Map payment status
+            wp_status = str(row.get('status', '')).lower().strip()
+            if wp_status == 'completed':
+                payment_status = 'completed'
+            elif wp_status in ('pending', 'processing'):
+                payment_status = 'pending'
+            elif wp_status in ('failed', 'refunded'):
+                payment_status = 'failed'
+            else:
+                payment_status = 'completed'  # Default for historical data
+
+            payment_record = {
+                'payment_id': row.get('payment_id'),
+                'amount_cents': amount_cents,
+                'status': payment_status,
+                'date': payment_date,
+                'payment_gateway': row.get('payment_gateway'),
+                'transaction_id': row.get('transaction_id'),
+                'profile_id': row.get('profile_id'),
+                'subscription_plan_id': row.get('subscription_plan_id'),
+                'wordpress_user_id': row.get('user_id'),
+            }
+
+            if email not in payments_data:
+                payments_data[email] = []
+            payments_data[email].append(payment_record)
+
+    except Exception as e:
+        logger.error(f"Error parsing payments CSV: {str(e)}")
+        raise
+
+    return payments_data
+
+
+# ============================================================================
+# Comprehensive Import Analysis
+# ============================================================================
+
+def analyze_comprehensive_import(
+    users_csv_path: str,
+    members_csv_path: Optional[str] = None,
+    payments_csv_path: Optional[str] = None,
+    existing_emails: Optional[set] = None
+) -> Dict[str, Any]:
+    """
+    Analyze all CSV files for comprehensive import with cross-referencing.
+
+    Args:
+        users_csv_path: Path to WordPress users export CSV (required)
+        members_csv_path: Path to PMS members CSV (optional)
+        payments_csv_path: Path to PMS payments CSV (optional)
+        existing_emails: Set of emails already in database
+
+    Returns:
+        Comprehensive analysis with preview data for all files
+    """
+    if existing_emails is None:
+        existing_emails = set()
+
+    result = {
+        'users': {'total': 0, 'valid': 0, 'warnings': 0, 'errors': 0, 'preview': []},
+        'members': {'total': 0, 'matched': 0, 'unmatched': 0, 'data': {}},
+        'payments': {'total': 0, 'matched': 0, 'total_amount_cents': 0, 'data': {}},
+        'summary': {
+            'total_users': 0,
+            'importable_users': 0,
+            'duplicate_emails': 0,
+            'users_with_subscriptions': 0,
+            'users_with_payments': 0,
+            'total_payment_amount': 0,
+        }
+    }
+
+    # Parse members CSV if provided
+    members_data = {}
+    if members_csv_path:
+        try:
+            members_data = parse_members_csv(members_csv_path)
+            result['members']['total'] = len(members_data)
+            result['members']['data'] = members_data
+        except Exception as e:
+            result['members']['error'] = str(e)
+
+    # Parse payments CSV if provided
+    payments_data = {}
+    if payments_csv_path:
+        try:
+            payments_data = parse_payments_csv(payments_csv_path)
+            result['payments']['total'] = sum(len(p) for p in payments_data.values())
+            result['payments']['data'] = payments_data
+            result['payments']['total_amount_cents'] = sum(
+                sum(p['amount_cents'] for p in payments)
+                for payments in payments_data.values()
+            )
+        except Exception as e:
+            result['payments']['error'] = str(e)
+
+    # Parse users CSV
+    try:
+        df = pd.read_csv(users_csv_path)
+        result['users']['total'] = len(df)
+
+        seen_emails = set()
+        total_warnings = 0
+        total_errors = 0
+
+        for idx, row in df.iterrows():
+            row_dict = row.to_dict()
+            transformed = transform_csv_row_to_user_data(row_dict, existing_emails)
+
+            email = transformed['user_data'].get('email', '').lower()
+
+            # Check for CSV duplicates
+            if email in seen_emails:
+                transformed['errors'].append(f'Duplicate email in CSV')
+            elif email:
+                seen_emails.add(email)
+
+            # Cross-reference with members data
+            subscription_data = members_data.get(email)
+            if subscription_data:
+                result['members']['matched'] += 1
+
+            # Cross-reference with payments data
+            payment_records = payments_data.get(email, [])
+            if payment_records:
+                result['payments']['matched'] += 1
+
+            # Parse WordPress roles for role/status suggestion
+            wp_capabilities = row.get('wp_capabilities', '')
+            wp_roles = parse_php_serialized(wp_capabilities)
+            loaf_role, role_status = map_wordpress_role(wp_roles)
+
+            # Determine status
+            approval_status = str(row.get('wppb_approval_status', '')).strip()
+            has_subscription = 'pms_subscription_plan_63' in wp_roles or subscription_data is not None
+
+            if role_status:
+                suggested_status = role_status
+            else:
+                suggested_status = suggest_status(approval_status, has_subscription, loaf_role)
+
+            # Build preview row
+            preview_row = {
+                'row_number': idx + 1,
+                'email': email,
+                'first_name': transformed['user_data'].get('first_name', ''),
+                'last_name': transformed['user_data'].get('last_name', ''),
+                'phone': transformed['user_data'].get('phone', ''),
+                'date_of_birth': transformed['user_data'].get('date_of_birth').isoformat() if transformed['user_data'].get('date_of_birth') else None,
+                'wordpress_user_id': transformed['user_data'].get('wordpress_user_id'),
+                'wordpress_roles': wp_roles,
+                'suggested_role': loaf_role,
+                'suggested_status': suggested_status,
+                'has_subscription': has_subscription,
+                'subscription_data': subscription_data,
+                'payment_count': len(payment_records),
+                'total_paid_cents': sum(p['amount_cents'] for p in payment_records),
+                'user_data': transformed['user_data'],
+                'custom_data': transformed['custom_data'],
+                'newsletter_prefs': transformed['newsletter_prefs'],
+                'warnings': transformed['warnings'],
+                'errors': transformed['errors'],
+            }
+
+            result['users']['preview'].append(preview_row)
+            total_warnings += len(transformed['warnings'])
+            total_errors += len(transformed['errors'])
+
+            if not transformed['errors']:
+                result['users']['valid'] += 1
+
+        result['users']['warnings'] = total_warnings
+        result['users']['errors'] = total_errors
+
+        # Calculate unmatched members
+        user_emails = {p['email'] for p in result['users']['preview'] if p['email']}
+        result['members']['unmatched'] = len(set(members_data.keys()) - user_emails)
+
+        # Summary stats
+        result['summary']['total_users'] = result['users']['total']
+        result['summary']['importable_users'] = result['users']['valid']
+        result['summary']['duplicate_emails'] = len(seen_emails & existing_emails)
+        result['summary']['users_with_subscriptions'] = result['members']['matched']
+        result['summary']['users_with_payments'] = result['payments']['matched']
+        result['summary']['total_payment_amount'] = result['payments']['total_amount_cents']
+
+    except Exception as e:
+        logger.error(f"Error analyzing users CSV: {str(e)}")
+        result['users']['error'] = str(e)
+        raise
+
+    return result
+
+
 # ============================================================================
 # CSV Analysis and Preview Generation
 # ============================================================================
@@ -344,8 +1066,6 @@ def analyze_csv(file_path: str, existing_emails: Optional[set] = None) -> Dict:
            }
        }
    """
-    import pandas as pd
-
    # Read CSV with pandas
    df = pd.read_csv(file_path)

@@ -521,11 +1241,4 @@ def format_preview_for_display(preview_data: List[Dict], page: int = 1, page_siz
 # Module Initialization
 # ============================================================================

-# Import pandas for CSV processing
-try:
-    import pandas as pd
-except ImportError:
-    logger.error("pandas library not found. Please install: pip install pandas")
-    raise
-
 logger.info("WordPress parser module loaded successfully")