Files
pdm/rollback_filedata.py

236 lines
8.7 KiB
Python

"""
Rollback File Data Migration Script
Deletes VariableValue records from target database for documents that were migrated.
Uses the document mapping CSV to identify which target DocumentIDs to delete.
"""
import json
import logging
import csv
import sys
from datetime import datetime
from db_utils import DatabaseConnection
class FileDataRollback:
def __init__(self, config_path='config.json', mapping_csv_path=None):
"""Initialize the rollback tool with configuration and mapping file."""
with open(config_path, 'r') as f:
self.config = json.load(f)
self.target_conn = None
self.mapping_csv_path = mapping_csv_path
self.target_document_ids = []
# Setup logging
self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
log_filename = f"rollback_filedata_{self.timestamp}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_filename),
logging.StreamHandler()
]
)
self.logger = logging.getLogger(__name__)
def connect_database(self):
"""Establish connection to target database."""
self.logger.info("Connecting to target database...")
self.target_conn = DatabaseConnection(self.config['target_db'])
self.logger.info("Target database connection established.")
def load_document_mappings(self):
"""Load document mappings from CSV file and extract target DocumentIDs."""
if not self.mapping_csv_path:
raise ValueError("No mapping CSV file path provided")
self.logger.info(f"Loading document mappings from: {self.mapping_csv_path}")
with open(self.mapping_csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
# Check if required columns exist
fieldnames = reader.fieldnames
if 'TargetDocID' not in fieldnames:
raise ValueError(f"CSV must contain 'TargetDocID' column. Found: {fieldnames}")
for row in reader:
target_id = row['TargetDocID']
if target_id and target_id != '':
try:
self.target_document_ids.append(int(target_id))
except ValueError:
self.logger.warning(f"Skipping invalid TargetID: {target_id}")
self.logger.info(f"Loaded {len(self.target_document_ids)} target DocumentIDs from mapping file")
def preview_deletion(self):
"""Preview how many records will be deleted."""
self.logger.info("=" * 50)
self.logger.info("Previewing deletion impact...")
self.logger.info("=" * 50)
# Count records that will be deleted
count_query = """
SELECT COUNT(*) as RecordCount
FROM VariableValue
WHERE DocumentID = ? AND ProjectID = 2
"""
total_records = 0
documents_with_data = 0
for doc_id in self.target_document_ids[:10]: # Preview first 10
result = self.target_conn.execute_query(count_query, (doc_id,))
count = result[0]['RecordCount'] if result else 0
if count > 0:
self.logger.info(f" DocumentID {doc_id}: {count} records")
total_records += count
documents_with_data += 1
if len(self.target_document_ids) > 10:
self.logger.info(f" ... and {len(self.target_document_ids) - 10} more documents")
# Get total count for all documents
self.logger.info("")
self.logger.info("Calculating total impact across all documents...")
# Count in batches to avoid SQL parameter limit (2100)
batch_size = 2000
total_count = 0
for i in range(0, len(self.target_document_ids), batch_size):
batch = self.target_document_ids[i:i + batch_size]
placeholders = ','.join(['?'] * len(batch))
batch_query = f"""
SELECT COUNT(*) as BatchCount
FROM VariableValue
WHERE DocumentID IN ({placeholders}) AND ProjectID = 2
"""
result = self.target_conn.execute_query(batch_query, tuple(batch))
batch_count = result[0]['BatchCount'] if result else 0
total_count += batch_count
self.logger.info("=" * 50)
self.logger.info(f"TOTAL RECORDS TO BE DELETED: {total_count}")
self.logger.info(f"Across {len(self.target_document_ids)} mapped documents")
self.logger.info("=" * 50)
return total_count
def rollback_variable_values(self):
"""Delete VariableValue records for all mapped target DocumentIDs."""
self.logger.info("=" * 50)
self.logger.info("Starting Rollback of File Data")
self.logger.info("=" * 50)
# Get user confirmation
total_to_delete = self.preview_deletion()
if total_to_delete == 0:
self.logger.info("No records found to delete. Exiting.")
return
self.logger.info("")
try:
response = input(f"Are you sure you want to DELETE {total_to_delete} records? (yes/no): ").strip().lower()
except EOFError:
# Non-interactive mode
self.logger.error("Cannot proceed without confirmation in non-interactive mode")
return
if response != 'yes':
self.logger.info("Rollback cancelled by user")
return
# Perform deletion
delete_query = """
DELETE FROM VariableValue
WHERE DocumentID = ? AND ProjectID = 2
"""
total_deleted = 0
batch_size = 100
commit_interval = 10 # Commit every 10 batches
self.logger.info(f"Processing {len(self.target_document_ids)} documents in batches of {batch_size}...")
try:
for i in range(0, len(self.target_document_ids), batch_size):
batch = self.target_document_ids[i:i + batch_size]
batch_num = (i // batch_size) + 1
total_batches = (len(self.target_document_ids) + batch_size - 1) // batch_size
batch_deleted = 0
for doc_id in batch:
try:
cursor = self.target_conn.connection.cursor()
cursor.execute(delete_query, (doc_id,))
rows_deleted = cursor.rowcount
batch_deleted += rows_deleted
cursor.close()
except Exception as e:
self.logger.error(f"Failed to delete records for DocumentID {doc_id}: {e}")
total_deleted += batch_deleted
self.logger.info(f"Batch {batch_num}/{total_batches}: Deleted {batch_deleted} records")
# Commit every N batches
if batch_num % commit_interval == 0:
self.target_conn.commit()
self.logger.info(f"[COMMIT] Transaction committed at batch {batch_num}")
# Final commit
self.target_conn.commit()
self.logger.info("[COMMIT] Final transaction committed")
self.logger.info("=" * 50)
self.logger.info(f"Rollback Complete!")
self.logger.info(f"Total records deleted: {total_deleted}")
self.logger.info("=" * 50)
except Exception as e:
self.logger.error(f"Rollback failed: {e}")
self.logger.error("Attempting to rollback transaction...")
self.target_conn.connection.rollback()
raise
def run(self):
"""Execute the rollback process."""
try:
self.logger.info("=" * 50)
self.logger.info("File Data Rollback Tool")
self.logger.info("=" * 50)
self.connect_database()
self.load_document_mappings()
self.rollback_variable_values()
self.logger.info("=" * 50)
self.logger.info("Rollback process completed successfully!")
self.logger.info("=" * 50)
except Exception as e:
self.logger.error(f"Rollback process failed: {e}", exc_info=True)
raise
finally:
if self.target_conn:
self.target_conn.close()
if __name__ == "__main__":
# Check for CSV file argument
if len(sys.argv) < 2:
print("Usage: python rollback_filedata.py <mapping_csv_file>")
print("Example: python rollback_filedata.py mapping_documents_filedata_20251208_075312.csv")
sys.exit(1)
mapping_csv = sys.argv[1]
rollback = FileDataRollback(config_path='config.json', mapping_csv_path=mapping_csv)
rollback.run()