Template-Based CSV Import System with R2 Storage

Solution: Updated backend/r2_storage.py:
  - Added ALLOWED_CSV_TYPES for CSV file validation
  - Added upload_bytes() method for uploading raw bytes to R2
  - Added download_file() method for retrieving files from R2
  - Added delete_multiple() method for bulk file deletion

  Comprehensive upload endpoint now stores CSVs in R2:
  r2_storage = get_r2_storage()
  for file_type, (content, filename) in file_contents.items():
      _, r2_key, _ = await r2_storage.upload_bytes(
          content=content,
          folder=f"imports/{job_id}",
          filename=f"{file_type}_{filename}",
          content_type='text/csv'
      )
      r2_keys[file_type] = r2_key

  ---
  2. Stripe Transaction ID Tracking

  Solution: Updated subscription and donation imports to capture Stripe metadata:

  Subscription fields:
  - stripe_subscription_id
  - stripe_customer_id
  - stripe_payment_intent_id
  - stripe_invoice_id
  - stripe_charge_id
  - stripe_receipt_url
  - card_last4, card_brand, payment_method

  Donation fields:
  - stripe_payment_intent_id
  - stripe_charge_id
  - stripe_receipt_url
  - card_last4, card_brand

  ---
  3. Fixed JSON Serialization Error

  Problem: Object of type datetime is not JSON serializable when saving import metadata.

  Solution: Added serialize_for_json() helper in backend/server.py:
  def serialize_for_json(obj):
      """Recursively convert datetime objects to ISO strings for JSON serialization."""
      if isinstance(obj, (datetime, date)):
          return obj.isoformat()
      elif isinstance(obj, dict):
          return {k: serialize_for_json(v) for k, v in obj.items()}
      elif isinstance(obj, list):
          return [serialize_for_json(item) for item in obj]
      # ... handles other types

  ---
  4. Fixed Route Ordering (401 Unauthorized)

  Problem: /admin/import/comprehensive/upload returned 401 because FastAPI matched "comprehensive" as a {job_id} parameter.

  Solution: Moved comprehensive import routes BEFORE generic {job_id} routes in backend/server.py:
  # Correct order:
  @app.post("/api/admin/import/comprehensive/upload")  # Specific route FIRST
  # ... other comprehensive routes ...

  @app.get("/api/admin/import/{job_id}/preview")  # Generic route AFTER

  ---
  5. Improved Date Parsing

  Solution: Added additional date formats to backend/wordpress_parser.py:
  formats = [
      '%m/%d/%Y', '%Y-%m-%d', '%d/%m/%Y', '%B %d, %Y', '%b %d, %Y',
      '%Y-%m-%d %H:%M:%S',
      '%m/%Y',      # Month/Year: 01/2020
      '%m-%Y',      # Month-Year: 01-2020
      '%b-%Y',      # Short month-Year: Jan-2020
      '%B-%Y',      # Full month-Year: January-2020
  ]
This commit is contained in:
2026-02-04 22:50:36 +07:00
parent e7f6e9c20a
commit 1988787a1f
7 changed files with 3583 additions and 75 deletions

View File

@@ -50,6 +50,14 @@ class R2Storage:
'image/svg+xml': ['.svg']
}
# CSV files for imports
ALLOWED_CSV_TYPES = {
'text/csv': ['.csv'],
'text/plain': ['.csv'], # Some systems report CSV as text/plain
'application/csv': ['.csv'],
'application/vnd.ms-excel': ['.csv'], # Old Excel type sometimes used for CSV
}
def __init__(self):
"""Initialize R2 client with credentials from environment"""
self.account_id = os.getenv('R2_ACCOUNT_ID')
@@ -240,6 +248,127 @@ class R2Storage:
except ClientError:
return False
async def upload_bytes(
self,
content: bytes,
folder: str,
filename: str,
content_type: str = 'text/csv'
) -> tuple[str, str, int]:
"""
Upload raw bytes to R2 storage (useful for CSV imports)
Args:
content: Raw bytes to upload
folder: Folder path in R2 (e.g., 'imports/job-id')
filename: Original filename
content_type: MIME type of the content
Returns:
tuple: (public_url, object_key, file_size_bytes)
Raises:
HTTPException: If upload fails
"""
try:
file_size = len(content)
# Generate unique filename preserving original extension
file_extension = Path(filename).suffix.lower() or '.csv'
unique_filename = f"{uuid.uuid4()}{file_extension}"
object_key = f"{folder}/{unique_filename}"
# Upload to R2
self.client.put_object(
Bucket=self.bucket_name,
Key=object_key,
Body=content,
ContentType=content_type,
ContentLength=file_size
)
# Generate public URL
public_url = self.get_public_url(object_key)
return public_url, object_key, file_size
except ClientError as e:
raise HTTPException(
status_code=500,
detail=f"Failed to upload to R2: {str(e)}"
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Upload error: {str(e)}"
)
async def download_file(self, object_key: str) -> bytes:
"""
Download a file from R2 storage
Args:
object_key: The S3 object key (path) of the file
Returns:
bytes: File content
Raises:
HTTPException: If download fails
"""
try:
response = self.client.get_object(
Bucket=self.bucket_name,
Key=object_key
)
return response['Body'].read()
except ClientError as e:
if e.response['Error']['Code'] == 'NoSuchKey':
raise HTTPException(status_code=404, detail="File not found in storage")
raise HTTPException(
status_code=500,
detail=f"Failed to download file from R2: {str(e)}"
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Download error: {str(e)}"
)
async def delete_multiple(self, object_keys: list[str]) -> bool:
"""
Delete multiple files from R2 storage
Args:
object_keys: List of S3 object keys to delete
Returns:
bool: True if successful
Raises:
HTTPException: If deletion fails
"""
if not object_keys:
return True
try:
# R2/S3 delete_objects accepts up to 1000 keys at once
objects = [{'Key': key} for key in object_keys if key]
if objects:
self.client.delete_objects(
Bucket=self.bucket_name,
Delete={'Objects': objects}
)
return True
except ClientError as e:
raise HTTPException(
status_code=500,
detail=f"Failed to delete files from R2: {str(e)}"
)
# Singleton instance
_r2_storage = None